0

我正在尝试使用 BigQuery 使用类似于 Wordle 的规则进行一些单词/字母分析。

你知道该怎么做:

  • 信件在场但在其他位置
  • 正确位置的字母
  • ⬜ 字母不存在。

我有一个有效的查询,但出于三个原因我并不十分满意:

  1. 它看起来不太像 BigQuery-y
  2. 对于大量单词,查询不能很好地扩展
  3. 延伸到,比如说,7个字母的单词看起来真的,真的,真的很可怕,很乱,任何数量的讨厌的形容词都可以很容易地描述它

我这样做的方式模糊地让我想起了递归 CTE,但遗憾的是,这些对我来说完全难以捉摸......

让它更快并不是一个真正的问题,但我想知道是否有更高级/优雅的方式(因为缺乏更好的术语)来完成相同的结果:递归 CTE,TVF 我猜使用 Javascript UDF 是一种选择,但我'想远离他们,而是探索仅 SQL 的替代方案。

这是丑陋的查询:

with words as (
    select normalized_word as word, 'solution' word_type
    from unnest(["SCAPE","PETER","SMACK","MAMMA"]) as normalized_word
),
split_word as (
    select word,letter,word_type,
    row_number() over(partition by word) as pos
    from words, unnest(split(word,'')) as letter
),
check_correct as (
    select a.word word1,any_value(a.word_type) as word_type,b.word word2,
     string_agg(if(a.letter=b.letter,'',a.letter),'' order by a.pos) w1,
     string_agg(if(a.letter=b.letter,'❌',b.letter),'' order by a.pos) w2,
     sum(if(a.letter=b.letter,1,0)) c,0 p
    from split_word as a, split_word as b
    where a.word <> b.word
    and b.word_type = 'solution'
    and a.pos = b.pos
    group by a.word,b.word
),
check_first as (
    select word1,word_type,word2,c,
    if(instr(w2,substring(w1,1,1))>0,p+1,p) p,
    if(instr(w2,substring(w1,1,1))>0,concat('',right(w1,4)),w1) w1,
    if(instr(w2,substring(w1,1,1))>0,
      concat(
        left(w2,instr(w2,substring(w1,1,1))-1),
        '❌',
        right(w2,length(w2)-instr(w2,substring(w1,1,1)))),
      w2) w2,
    from check_correct
),
check_second as (
    select word1,word_type,word2,c,
    if(instr(w2,substring(w1,2,1))>0,p+1,p) p,
    if(instr(w2,substring(w1,2,1))>0,concat(left(w1,1),'',right(w1,3)),w1) w1,
    if(instr(w2,substring(w1,2,1))>0,
      concat(
        left(w2,instr(w2,substring(w1,2,1))-1),
        '❌',
        right(w2,length(w2)-instr(w2,substring(w1,2,1)))),
      w2) w2,
    from check_first
),
check_third as (
    select word1,word_type,word2,c,
    if(instr(w2,substring(w1,3,1))>0,p+1,p) p,
    if(instr(w2,substring(w1,3,1))>0,concat(left(w1,2),'',right(w1,2)),w1) w1,
    if(instr(w2,substring(w1,3,1))>0,
      concat(
        left(w2,instr(w2,substring(w1,3,1))-1),
        '❌',
        right(w2,length(w2)-instr(w2,substring(w1,3,1)))),
      w2) w2,
    from check_second
),
check_fourth as (
    select word1,word_type,word2,c,
    if(instr(w2,substring(w1,4,1))>0,p+1,p) p,
    if(instr(w2,substring(w1,4,1))>0,concat(left(w1,3),'',right(w1,1)),w1) w1, 
    if(instr(w2,substring(w1,4,1))>0,
      concat(
        left(w2,instr(w2,substring(w1,4,1))-1),
        '❌',
        right(w2,length(w2)-instr(w2,substring(w1,4,1)))),
      w2) w2,
    from check_third
),
check_fifth as (
    select word1,word_type,word2,c,
    if(instr(w2,substring(w1,5,1))>0,p+1,p) p,
    if(instr(w2,substring(w1,5,1))>0,concat(left(w1,4),''),w1) w1,
    if(instr(w2,substring(w1,5,1))>0, 
      concat(
        left(w2,instr(w2,substring(w1,5,1)) - 1),
        '❌',
        right(w2,length(w2)-instr(w2,substring(w1,5,1)))),
      w2) w2, -- for completeness
    from check_fourth
),
final_result as (
    select word1 as guess, word_type as guess_type, word2 as solution, c as correct, p as present, 
    length(w1)-c-p as absent,
    regexp_replace(w1, r'([A-Z])', '⬜') as wordle, w1, w2 
    from check_fifth
) 

select *
from final_result
order by guess

结果如下所示: 全部 12 行

注意一些有趣的边缘情况,猜测词中有重复的字母:2、3、5。我丑陋的查询通过在解决方案中“消耗”匹配的字母并用 ❌ 替换它们来避免进一步匹配来管理这个。这是 的唯一目的w2:避免多于一场比赛。

更新

米哈伊尔的解决方案几乎可以工作,但当猜测单词有重复字母时失败: 几乎可以工作!

4

1 回答 1

2

考虑以下方法 - 适用于任意数量的字母

with words as (
    select normalized_word as word, 'solution' word_type
    from unnest(["SHAPE","PETER","TAPES","JUMBO","NINJA","MAMMA"]) as normalized_word
), pairs as (
  select t1.word as guess, t2.word as solution
  from words t1, words t2
), greens as (
  select guess, solution, x, offset, color
  from pairs t, unnest(array(
      select as struct x, offset, if(x=y, '', '⬜') color
      from unnest(split(guess, '')) x with offset 
      left join unnest(split(solution, '')) y with offset 
      using(offset)
    ))
  where guess != solution
), yellows_temp as (
  select guess, solution, x, color
  from pairs t, unnest(array(
      select as struct x, '' color
      from unnest(split(guess, '')) x with offset as pos1
      join unnest(split(solution, '')) y with offset as pos2
      on x = y and pos1 != pos2
      group by x, color
    ))
  where guess != solution
), yellows as (
  select guess, solution, x, '' color 
  from yellows_temp y
  left join (
    select guess, solution, x
    from greens
    where color = ''
  ) g
  using (guess, solution, x)
  where g.x is null
)
select guess, solution, 
  countif(g.color = '') correct, 
  count(distinct if(g.color = '⬜' and y.color = '', y.x, null)) present,
  string_agg(if(g.color = '⬜' and y.color = '', '', g.color), '' order by offset) as wordle,
  string_agg(if(g.color = '⬜' and y.color = '', '', if(g.color = '', '', g.x)), '' order by offset) as w1
from greens g
left join yellows y 
using(guess, solution, x)
group by guess, solution
-- order by guess, solution    

带输出

在此处输入图像描述

为您留下 w2 - 使用上面作为起点应该相对简单:o)

于 2022-02-12T06:23:00.603 回答