1

给定两个字符串uv我们可以使用流行的 Levenshtein 算法计算编辑距离。使用 Sim 等人在 [1] 中介绍的方法。我能够k使用以下代码在 Python 中计算字符串的近似周期

def wagnerFischerTable(a, b):
    D = [[0]]
    [D.append([i]) for i, s in enumerate(a, 1)]
    [D[0].append(j) for j, t in enumerate(b, 1)]

    for j, s in enumerate(b, 1):
        for i, t in enumerate(a, 1):
            if s == t:
                D[i].append(D[i-1][j-1])
            else:
                D[i].append(
                    min(
                        D[i-1][j] + 1,
                        D[i][j-1] + 1,
                        D[i-1][j-1] +1
                    )
                )
    return D

def simEtAlTables(s, p):
    D = []
    for i in xrange(len(s)):
        D.append(wagnerFischerTable(p, s[i:]))
    return D

def approx(s, p):
    D = simEtAlTables(s, p)
    t = [0]
    for i in xrange(1, len(s)+1):
        cmin = 9000
        for h in xrange(0, i):
            cmin = min(
                cmin,
                max(t[h], D[h][-1][i-h])
            )
        t.append(cmin)
    return t[len(s)]

我想把它移植到 F#,但是我还没有成功,我期待得到一些可能有问题的反馈。

let inline min3 x y z = 
    min (min x y) z

let wagnerFischerTable (u: string) (v: string) =
    let m = u.Length
    let n = v.Length
    let d = Array2D.create (m + 1) (n + 1) 0

    for i = 0 to m do d.[i, 0] <- i
    for j = 0 to n do d.[0, j] <- j    

    for j = 1 to n do
        for i = 1 to m do
            if u.[i-1] = v.[j-1] then
                d.[i, j] <- d.[i-1, j-1]
            else
                d.[i, j] <-
                    min3
                        (d.[i-1, j  ] + 1) // a deletion
                        (d.[i  , j-1] + 1) // an insertion
                        (d.[i-1, j-1] + 1) // a substitution
    d

let simEtAlTables (u: string) (v: string) =
    let rec tabulate n lst =
        if n <> u.Length then
            tabulate (n+1) (lst @ [wagnerFischerTable (u.Substring(n)) v])
        else
            lst
    tabulate 0 []

let approx (u: string) (v: string) =
    let tables = simEtAlTables u v
    let rec kApprox i (ks: int list) =
        if i = u.Length + 1 then
            ks
        else
            let mutable curMin = 9000
            for h = 0 to i-1 do
                curMin <- min curMin (max (ks.Item h) ((tables.Item h).[i-h, v.Length - 1]))
            kApprox (i+1) (ks @ [curMin])
    List.head (List.rev (kApprox 1 [0]))

它“不起作用”的原因只是我得到了错误的价值观。Python 代码通过了所有测试用例,而 F# 代码在所有测试中都失败了。我认为我在函数simEtAlTables和/或approx. 可能与索引有关,尤其是访问approx.

所以这里有三个测试用例应该涵盖不同的结果:

Test 1: approx "abcdabcabb" "abc" -> 1
Test 2: approx "abababababab" "ab" -> 0
Test 3: approx "abcdefghijklmn" "xyz" -> 3

[1] http://www.lirmm.fr/~rivals/ALGOSEQ/DOC/SimApprPeriodsTCS262.pdf

4

2 回答 2

3

这至少不起作用(您的 Python 解决方案也不是),但这是对 F# 的更直接的翻译。也许您可以将其用作起点,并从那里使它更具功能性(尽管我会冒险猜测它不会提高性能)。

let wagnerFischerTable (a: string) (b: string) =
  let d = ResizeArray([ResizeArray([0])])
  for i = 1 to a.Length do d.Add(ResizeArray([i]))
  for j = 1 to b.Length do d.[0].Add(j)
  for j = 1 to b.Length do
    for i = 1 to a.Length do
      let s, t = b.[j-1], a.[i-1]
      if s = t then
        d.[i].Add(d.[i-1].[j-1])
      else
        d.[i].Add(
          Seq.min [
            d.[i-1].[j] + 1
            d.[i].[j-1] + 1
            d.[i-1].[j-1] + 1
          ])
  d

let simEtAlTables (s: string) (p: string) =
  let d = ResizeArray()
  for i = 0 to s.Length - 1 do
    d.Add(wagnerFischerTable p s.[i..])
  d

let approx (s: string) (p: string) =
  let d = simEtAlTables s p
  let t = ResizeArray([0])
  for i = 1 to s.Length do
    let mutable cmin = 9000
    for h = 0 to i - 1 do
      let dh = d.[h]
      cmin <- min cmin (max t.[h] dh.[dh.Count-1].[i-h])
    t.Add(cmin)
  t.[s.Length]
于 2013-08-15T14:54:50.263 回答
0

代码可能会有所帮助:

let levenshtein word1 word2 =
    let preprocess = fun (str : string) -> str.ToLower().ToCharArray()
    let chars1, chars2 = preprocess word1, preprocess word2
    let m, n = chars1.Length, chars2.Length
    let table : int[,] = Array2D.zeroCreate (m + 1) (n + 1)
    for i in 0..m do
        for j in 0..n do
            match i, j with
            | i, 0 -> table.[i, j] <- i
            | 0, j -> table.[i, j] <- j
            | _, _ ->
                let delete = table.[i-1, j] + 1
                let insert = table.[i, j-1] + 1
                //cost of substitution is 2
                let substitute = 
                    if chars1.[i - 1] = chars2.[j - 1] 
                        then table.[i-1, j-1] //same character
                        else table.[i-1, j-1] + 2
                table.[i, j] <- List.min [delete; insert; substitute]
    table.[m, n], table //return tuple of the table and distance

//test
levenshtein "intention" "execution" //|> ignore

您可能还想查看Rick Minerich 的这篇博文。

于 2013-08-15T16:36:10.450 回答