1

我在这里找到了一篇关于使用 Tries 修复拼写错误的很酷的博客文章,但是它是用旧版本的 F# 编写的。
http://blog.lab49.com/archives/2841 我已经修复了大部分问题,除了最后我在我认为出现问题的所有大写字母中发表了评论。基本上在一些地方需要一个 Set 并给出一个 Map<> ,反之亦然。我已经盯着这段代码几个小时了,似乎无法弄清楚事情在哪里变得不匹配。

open System
open System.Linq


///'k represents the base key type, and 'a represents the element type.
type trie<'k,'a when 'k : comparison> = TNode of ('a option * Map<'k,trie<'k,'a>>)

///'ks represents a type that stores a sequence of the key elements, and 'k represents the type of the key elements.
type iter_module<'ks,'k> =
    {
        eos : ('ks -> bool) //whether or not a key-sequence is at its end.
        head : ('ks -> 'k) //gets the current key-value in a key sequence
        tail : ('ks -> 'ks) // increments the key-sequence iterator.
    }

let siterm : iter_module<string * int, char> =
    {
        eos = fun (s,i) -> i >= String.length s
        head = fun (s,i) -> s.[i]
        tail = fun (s,i) -> (s, i+1)
    }

///Extracts the option value associated with a node.
let node_value = function
| TNode (ov,_) -> ov

///Extracts the map representing connections from the current trie node to sub-trees.
let node_map = function
| TNode (_, m) -> m

///Determines whether a trie is empty
let is_empty tn = Map.isEmpty (node_map tn)

///Empty trie.
let empty_trie = TNode (None, Map.empty)

/// <summary>
/// Find a sub-trie.
/// </summary>
/// <param name="tn">The current trie node.</param>
/// <param name="k">The key.</param>
let find_subtrie tn k =
    try
        Map.find k (node_map tn)
    with
        not_found -> empty_trie

/// <summary>
/// Update the trie.
/// </summary>
/// <param name="m">The iteration module.</param>
/// <param name="tn">The trie node.</param>
/// <param name="ks">a sequence of the key elements</param>
/// <param name="v">Option containing the element</param>
let add m tn ks v =
    let rec upd tn' ks' =
        if(m.eos ks') then
            TNode(Some v, (node_map tn'))
        else
            let k = (m.head ks') in
            let ov = node_value tn' in
            let tn'' = upd(find_subtrie tn' k)(m.tail ks') in
                TNode(ov, Map.add k tn'' (node_map tn'))
    in
        upd tn ks

/// <summary>
/// Lookup the option value associated with a string
/// </summary>
/// <param name="m">The iteration module.</param>
/// <param name="tn">The trie node.</param>
/// <param name="ks">a sequence of the key elements</param>
let lookup m tn ks =
    let rec lv tn' ks' =
        if(m.eos ks') then
            node_value tn'
        else
            let k = (m.head ks') in
                lv(find_subtrie tn' k)(m.tail ks')

    in
        lv tn ks

/// <summary>
/// Determine whether or not a specific work is valid in the trie by a simple use of the lookup function.
/// </summary>
/// <param name="m">The iteration module.</param>
/// <param name="tn">The trie node.</param>
/// <param name="ks">a sequence of the key elements</param>
let mem m tn ks =
    match(lookup m tn ks) with
    | Some _ -> true
    | None -> false

/// <summary>
/// Determines the suffix of a string
/// </summary>
/// <param name="s">The string to find the suffix of.</param>
/// <param name="i">The starting point of the suffix.</param>
let suffix(s:String,i) = s.Substring(i,(String.length s - i))

/// <summary>
/// Computes the set of words defined by a trie node if we follow any valid character from that node, and then try to follow a specific char sequences.
/// </summary>
/// <param name="pfx">The prefix.</param>
/// <param name="trie">The trie node.</param>
/// <param name="wi">Word index.</param>
/// <param name="words">The words.</param>
let strings_with_suffix pfx trie wi words =
 let accumulate_paths c trie' ws =
  if (mem siterm trie' wi) then
   Set.add (pfx + (string c) + (suffix wi)) ws
  else
   ws
 in
 //THIS IS WHERE THINGS APPEAR TO BE GOING WRONG
 Map.fold accumulate_paths (node_map trie) words

/// <summary>
/// Spelling correction suggestions.
/// </summary>
/// <param name="trie">The trie.</param>
/// <param name="word">The mispelled word.</param>
let suggestions trie word =
  let rec paths_around_char pfx trie wi words =
   if (siterm.eos wi) then
    strings_with_suffix pfx trie ("", 0) words
   else if (is_empty trie) then
    words
   else
    let c      = siterm.head wi in
    let wi'    = siterm.tail wi in
    let trie'  = find_subtrie trie c in
    let ins_ws = strings_with_suffix pfx trie wi words in
    let rep_ws = strings_with_suffix pfx trie wi' ins_ws in
    let del_ws = if (mem siterm trie wi') then
                  Set.add (pfx + (suffix wi')) rep_ws
                 else
                  rep_ws
    in
     paths_around_char (pfx + (string c)) trie' wi' del_ws
  in
   paths_around_char "" trie (word, 0) Set.empty

提前致谢,

鲍勃

4

1 回答 1

2

似乎问题出在 strings_with_suffix 中 Map.fold 的不同签名版本的 F# 标准库具有不同的参数顺序)

折叠的签名: ('State -> 'Key -> 'Value -> 'State) -> 'State -> Map<'Key, 'Value> -> 'State

当前呼叫站点:

let strings_with_suffix pfx trie wi words =
    let accumulate_paths c trie' ws (* 2 *)=
        if (mem siterm trie' wi) then
           Set.add (pfx + (string c) + (suffix wi)) ws
        else
           ws
in
//THIS IS WHERE THINGS APPEAR TO BE GOING WRONG
Map.fold accumulate_paths (node_map trie) (* 1 *) words
  1. 调用者传递 'State 作为最后一个参数,而不是第一个
  2. accumulate_paths 接受 'State 作为最后一个参数,而不是第一个参数

正确的版本将如下所示:

let strings_with_suffix pfx trie wi words =
    let accumulate_paths ws c trie' =
        if (mem siterm trie' wi) then
            Set.add (pfx + (string c) + (suffix wi)) ws
        else
            ws
    in
    Map.fold accumulate_paths words (node_map trie) 
于 2011-06-26T07:27:42.893 回答