3

我有一个 trie(也称为前缀树)。给定一个前缀,我想得到一个以前缀开头的十个单词的列表。

这个问题的独特之处在于我只想要10 个以给定前缀开头的单词——而不是全部。鉴于此,可以进行优化。

我知道下面的代码可以正常工作。trie 中的每个节点都有一个children属性和一个this_is_the_end_of_a_word属性。例如,当您插入“hi”时,trie 如下所示:

特里.

问题:给定一个前缀,我想得到一个以该前缀开头的十个单词的列表。

我解决这个问题的方法是:沿着前缀树向下,跟随 的字符,prefix直到到达与 . 的最后一个字符相对应的节点prefix。现在您应该在该节点上执行 DFS,跟踪this_is_the_end_of_a_word === true列表中的节点。但是当列表的长度等于 10 时,您应该停止搜索,并返回列表。

我认为我的方法是合理的,但我在实现它时遇到了麻烦——特别是因为我正在尝试使用递归 DFS,所以我不确定如何在递归调用之间传递“全局”列表。我知道我应该使用闭包,但我是 javascript 新手,我不确定如何去做。我已经尝试过的一个例子如下。

我的 Trie 类(我知道这段代码有效,这只是为了让您了解我是如何组织我的数据结构的。)

var Trie = function() {

    var that = Object.create(Trie.prototype);
    that.children = {}; //mapping: next character -> child nodes
    that.this_is_the_end_of_a_word = false;

    that.insertWord = function(word) {

        var current_node = that;

        for (var i = 0; i < word.length; i++) {
            var c = word[i]
                //if character is not in the trie already, add it
            if (!(c in current_node.children)) {
                current_node.children[c] = Trie();
            }
            //update current_node
            current_node = current_node.children[c];
        };

        //after adding all the chars of the word, 
        //you are at the end of a word
        current_node.this_is_the_end_of_a_word = true;
    }

    that.insertWords = function(words) {
        for (var i = 0; i < words.length; i++) {
            that.insertWord(words[i]);
        }
    }

    that.contains = function(word) {
        //start at the root
        var current_node = that;
        for (var i = 0; i < word.length; i++) {
            var c = word[i];

            //if the word's character isn't a child of the current_node, 
            //the word isn't in the trie
            if (!(c in current_node.children)) {
                return false;
            }
            //move down the trie, update current_node
            current_node = current_node.children[c];
        };
        return current_node.this_is_the_end_of_a_word;
    }

    Object.freeze(that);
    return that;
}

我的第一种方法(有很多错误)

num_words_to_go = 10; 
//this global is bad practice; 
//I want to put this as the argument to a closure 
//so it's passed between recursive calls

that.getWords = function(start_node, prefix) {
   console.log(0);
   var words = [];

   //if start node is a word, add it
   if (start_node.this_is_the_end_of_a_word) {
       words.push(start_node);
       num_words_to_go--;
   }

   if (num_words_to_go <= 0 || !start_node.children) {
       return words;
   }

   return start_node.children.forEach(
                              currentValue.getWords(
                                    currentValue, prefix + <character for this child>)); 

   /*I can't think of a nice way to write this without going through all of the children. 
   I know I don't need to, because I only need to find 10 words and get out. 
   This is why I was leaning towards the recursive DFS. 
   */

}

第二种方法:我还发现了一个我正在查看的 python 示例:http: //v1v3kn.tumblr.com/post/18238156967/roll-your-own-autocomplete-solution-using-tries 我尝试将他的示例翻译成 JavaScript,但仍然有问题all_suffixes

that.all_suffixes = function (prefix){
    results = [];
    if (that.this_is_the_end_of_a_word) results.push(prefix);
    if (!(that.children)) return results;
    if (results.length > 2) return results;
    var callback = function(currentValue, i, array){
        return currentValue.all_suffixes(prefix+array[i]);
    }
    arr = that.children.forEach(callback, that);
        //[child.all_suffixes(prefix + char) for (char, child) in self.children.items()]
    return concat(reduce(concat, arr), results);        
}

 that.autocomplete = function(prefix){
    current_node = that;
    for(var i = 0; i < prefix.length; i++){
        var c = prefix[i];
        //if there is nothing in the trie with this prefix
        if (!(c in current_node.children)){
            return [];
        }
        current_node = current_node.children[c];
    }
    return list(current_node.all_suffixes(prefix))
 }
4

1 回答 1

0

基本上,我采用您的模型并将新方法getWords(word[, count])应用于Trie课程。我更改了方法contains,因为我也需要该功能getWords。所以我创建了一个新方法getNode,它返回找到单词或部分的节点。

该方法getWords首先查找单词(part),然后遍历数据结构。当找到一个单词时,它会被推送到结果集中。如果结果集长度大于或等于所需长度,则Array.prototype.some终止迭代(因此 )并停止递归调用fork

    that.getWords = function (word, count) {

        function fork(n, w) {

            function child(c) {
                return fork(n.children[c], w + c);
            }

            n.isWord && words.push(w);
            return words.length >= count || Object.keys(n.children).some(child);
        }

        var words = [],
            current_node = that.getNode(word);

        if (current_node) {
            fork(current_node, word);
            return words;
        }
    }

旁注:我this_is_the_end_of_a_word改为isWord.

输入

  1. 创建 的新实例Trie
  2. 插入一些单词进行测试。

输出

  1. 测试 trie 是否包含'motor',返回 false。
  2. 测试 trie 是否包含'te',返回 false。
  3. 测试 trie 是否包含'ten',返回 true。
  4. 获取所有'ind'以(8 个可用,显示 8 个)开头的单词。
  5. 'in'获取以(16 个可用,显示 10)开头的前 10 个单词。
  6. 整个尝试。

var Trie = function () {

    var that = Object.create(Trie.prototype);
    that.children = {}; //mapping: next character -> child nodes
    that.isWord = false;

    that.insertWord = function (word) {
        var current_node = that;
        for (var i = 0; i < word.length; i++) {
            var c = word[i]
            //if character is not in the trie already, add it
            if (!(c in current_node.children)) {
                current_node.children[c] = Trie();
            }
            //update current_node
            current_node = current_node.children[c];
        };

        //after adding all the chars of the word,
        //you are at the end of a word
        current_node.isWord = true;
    }

    that.insertWords = function (words) {
        for (var i = 0; i < words.length; i++) {
            that.insertWord(words[i]);
        }
    }

    that.getNode = function (word) {
        //start at the root
        var current_node = that;
        for (var i = 0; i < word.length; i++) {
            var c = word[i];

            //if the word's character isn't a child of the current_node,
            //the word isn't in the trie
            if (!(c in current_node.children)) {
                return;
            }
            //move down the trie, update current_node
            current_node = current_node.children[c];
        };
        return current_node;
    }

    that.contains = function (word) {
        var current_node = that.getNode(word);
        if (current_node) {
            return current_node.isWord;
        }
        return false;
    }

    that.getWords = function (word, count) {

        function fork(n, w) {

            function child(c) {
                return fork(n.children[c], w + c);
            }

            n.isWord && words.push(w);
            return words.length >= count || Object.keys(n.children).some(child);
        }

        var words = [],
            current_node = that.getNode(word);

        if (current_node) {
            fork(current_node, word);
            return words;
        }
    }

    // freeze does lock the isWord property, which is not required here
    //Object.freeze(that);
    return that;
}

var trie = new Trie();
trie.insertWords([
    'car', 'cool', 'i', 'in', 'indeed', 'independence', 'india', 'indoor', 'induction',
    'industrial', 'industry', 'indwell', 'inferior', 'informal', 'inhale', 'inn',
    'inside', 'instance', 'intrepid', 'of', 'off', 'other', 'tea', 'ted', 'ten',
    'to', 'zoo', 'zoom'
]);
document.write(trie.contains('motor') + '<br>'); // false
document.write(trie.contains('te') + '<br>'); // false
document.write(trie.contains('ten') + '<br>'); // true
document.write('<pre>' + JSON.stringify(trie.getWords('ind'), 0, 4) + '</pre>');
document.write('<pre>' + JSON.stringify(trie.getWords('in', 10), 0, 4) + '</pre>');
document.write('<pre>' + JSON.stringify(trie, 0, 4) + '</pre>');

于 2015-09-25T11:29:57.387 回答