我正在尝试使用 BigQuery 使用类似于 Wordle 的规则进行一些单词/字母分析。
你知道该怎么做:
- 信件在场但在其他位置
- 正确位置的字母
- ⬜ 字母不存在。
我有一个有效的查询,但出于三个原因我并不十分满意:
- 它看起来不太像 BigQuery-y
- 对于大量单词,查询不能很好地扩展
- 延伸到,比如说,7个字母的单词看起来真的,真的,真的很可怕,很乱,任何数量的讨厌的形容词都可以很容易地描述它
我这样做的方式模糊地让我想起了递归 CTE,但遗憾的是,这些对我来说完全难以捉摸......
让它更快并不是一个真正的问题,但我想知道是否有更高级/优雅的方式(因为缺乏更好的术语)来完成相同的结果:递归 CTE,TVF 我猜使用 Javascript UDF 是一种选择,但我'想远离他们,而是探索仅 SQL 的替代方案。
这是丑陋的查询:
with words as (
select normalized_word as word, 'solution' word_type
from unnest(["SCAPE","PETER","SMACK","MAMMA"]) as normalized_word
),
split_word as (
select word,letter,word_type,
row_number() over(partition by word) as pos
from words, unnest(split(word,'')) as letter
),
check_correct as (
select a.word word1,any_value(a.word_type) as word_type,b.word word2,
string_agg(if(a.letter=b.letter,'',a.letter),'' order by a.pos) w1,
string_agg(if(a.letter=b.letter,'❌',b.letter),'' order by a.pos) w2,
sum(if(a.letter=b.letter,1,0)) c,0 p
from split_word as a, split_word as b
where a.word <> b.word
and b.word_type = 'solution'
and a.pos = b.pos
group by a.word,b.word
),
check_first as (
select word1,word_type,word2,c,
if(instr(w2,substring(w1,1,1))>0,p+1,p) p,
if(instr(w2,substring(w1,1,1))>0,concat('',right(w1,4)),w1) w1,
if(instr(w2,substring(w1,1,1))>0,
concat(
left(w2,instr(w2,substring(w1,1,1))-1),
'❌',
right(w2,length(w2)-instr(w2,substring(w1,1,1)))),
w2) w2,
from check_correct
),
check_second as (
select word1,word_type,word2,c,
if(instr(w2,substring(w1,2,1))>0,p+1,p) p,
if(instr(w2,substring(w1,2,1))>0,concat(left(w1,1),'',right(w1,3)),w1) w1,
if(instr(w2,substring(w1,2,1))>0,
concat(
left(w2,instr(w2,substring(w1,2,1))-1),
'❌',
right(w2,length(w2)-instr(w2,substring(w1,2,1)))),
w2) w2,
from check_first
),
check_third as (
select word1,word_type,word2,c,
if(instr(w2,substring(w1,3,1))>0,p+1,p) p,
if(instr(w2,substring(w1,3,1))>0,concat(left(w1,2),'',right(w1,2)),w1) w1,
if(instr(w2,substring(w1,3,1))>0,
concat(
left(w2,instr(w2,substring(w1,3,1))-1),
'❌',
right(w2,length(w2)-instr(w2,substring(w1,3,1)))),
w2) w2,
from check_second
),
check_fourth as (
select word1,word_type,word2,c,
if(instr(w2,substring(w1,4,1))>0,p+1,p) p,
if(instr(w2,substring(w1,4,1))>0,concat(left(w1,3),'',right(w1,1)),w1) w1,
if(instr(w2,substring(w1,4,1))>0,
concat(
left(w2,instr(w2,substring(w1,4,1))-1),
'❌',
right(w2,length(w2)-instr(w2,substring(w1,4,1)))),
w2) w2,
from check_third
),
check_fifth as (
select word1,word_type,word2,c,
if(instr(w2,substring(w1,5,1))>0,p+1,p) p,
if(instr(w2,substring(w1,5,1))>0,concat(left(w1,4),''),w1) w1,
if(instr(w2,substring(w1,5,1))>0,
concat(
left(w2,instr(w2,substring(w1,5,1)) - 1),
'❌',
right(w2,length(w2)-instr(w2,substring(w1,5,1)))),
w2) w2, -- for completeness
from check_fourth
),
final_result as (
select word1 as guess, word_type as guess_type, word2 as solution, c as correct, p as present,
length(w1)-c-p as absent,
regexp_replace(w1, r'([A-Z])', '⬜') as wordle, w1, w2
from check_fifth
)
select *
from final_result
order by guess
注意一些有趣的边缘情况,猜测词中有重复的字母:2、3、5。我丑陋的查询通过在解决方案中“消耗”匹配的字母并用 ❌ 替换它们来避免进一步匹配来管理这个。这是 的唯一目的w2
:避免多于一场比赛。
更新