给定两个字符串u
,v
我们可以使用流行的 Levenshtein 算法计算编辑距离。使用 Sim 等人在 [1] 中介绍的方法。我能够k
使用以下代码在 Python 中计算字符串的近似周期
def wagnerFischerTable(a, b):
D = [[0]]
[D.append([i]) for i, s in enumerate(a, 1)]
[D[0].append(j) for j, t in enumerate(b, 1)]
for j, s in enumerate(b, 1):
for i, t in enumerate(a, 1):
if s == t:
D[i].append(D[i-1][j-1])
else:
D[i].append(
min(
D[i-1][j] + 1,
D[i][j-1] + 1,
D[i-1][j-1] +1
)
)
return D
def simEtAlTables(s, p):
D = []
for i in xrange(len(s)):
D.append(wagnerFischerTable(p, s[i:]))
return D
def approx(s, p):
D = simEtAlTables(s, p)
t = [0]
for i in xrange(1, len(s)+1):
cmin = 9000
for h in xrange(0, i):
cmin = min(
cmin,
max(t[h], D[h][-1][i-h])
)
t.append(cmin)
return t[len(s)]
我想把它移植到 F#,但是我还没有成功,我期待得到一些可能有问题的反馈。
let inline min3 x y z =
min (min x y) z
let wagnerFischerTable (u: string) (v: string) =
let m = u.Length
let n = v.Length
let d = Array2D.create (m + 1) (n + 1) 0
for i = 0 to m do d.[i, 0] <- i
for j = 0 to n do d.[0, j] <- j
for j = 1 to n do
for i = 1 to m do
if u.[i-1] = v.[j-1] then
d.[i, j] <- d.[i-1, j-1]
else
d.[i, j] <-
min3
(d.[i-1, j ] + 1) // a deletion
(d.[i , j-1] + 1) // an insertion
(d.[i-1, j-1] + 1) // a substitution
d
let simEtAlTables (u: string) (v: string) =
let rec tabulate n lst =
if n <> u.Length then
tabulate (n+1) (lst @ [wagnerFischerTable (u.Substring(n)) v])
else
lst
tabulate 0 []
let approx (u: string) (v: string) =
let tables = simEtAlTables u v
let rec kApprox i (ks: int list) =
if i = u.Length + 1 then
ks
else
let mutable curMin = 9000
for h = 0 to i-1 do
curMin <- min curMin (max (ks.Item h) ((tables.Item h).[i-h, v.Length - 1]))
kApprox (i+1) (ks @ [curMin])
List.head (List.rev (kApprox 1 [0]))
它“不起作用”的原因只是我得到了错误的价值观。Python 代码通过了所有测试用例,而 F# 代码在所有测试中都失败了。我认为我在函数simEtAlTables
和/或approx
. 可能与索引有关,尤其是访问approx
.
所以这里有三个测试用例应该涵盖不同的结果:
Test 1: approx "abcdabcabb" "abc" -> 1
Test 2: approx "abababababab" "ab" -> 0
Test 3: approx "abcdefghijklmn" "xyz" -> 3
[1] http://www.lirmm.fr/~rivals/ALGOSEQ/DOC/SimApprPeriodsTCS262.pdf