简单地说,我想检查一个指定的单词是否存在。
查找需要非常快,这就是我决定将字典存储在 trie 中的原因。到目前为止,一切都很好!我的 trie 没有问题。问题是用字典填充树。我目前正在做的是遍历作为字典的纯文本文件的每一行,并将每个单词添加到我的 trie 中。
这是一个非常缓慢的过程,这是可以理解的。该文件仅包含大约 120 000 行。如果有人能指出我能做什么的正确方向,我将不胜感激!
这就是我向 trie 添加单词的方式(在 Boo 中):
trie = Trie()
saol = Resources.Load("saol") as TextAsset
text = saol.text.Split(char('\n'))
for new_word in text:
trie.Add(new_word)
这是我的特里(在 C# 中):
using System.Collections.Generic;
public class TrieNode {
public char letter;
public bool word;
public Dictionary<char, TrieNode> child;
public TrieNode(char letter) {
this.letter = letter;
this.word = false;
this.child = new Dictionary<char, TrieNode>();
}
}
public class Trie {
private TrieNode root;
public Trie() {
root = new TrieNode(' ');
}
public void Add(string word) {
TrieNode node = root;
bool found_letter;
int c = 1;
foreach (char letter in word) {
found_letter = false;
// if current letter is in child list, set current node and break loop
foreach (var child in node.child) {
if (letter == child.Key) {
node = child.Value;
found_letter = true;
break;
}
}
// if current letter is not in child list, add child node and set it as current node
if (!found_letter) {
TrieNode new_node = new TrieNode(letter);
if (c == word.Length) new_node.word = true;
node.child.Add(letter, new_node);
node = node.child[letter];
}
c ++;
}
}
public bool Find(string word) {
TrieNode node = root;
bool found_letter;
int c = 1;
foreach (char letter in word) {
found_letter = false;
// check if current letter is in child list
foreach (var child in node.child) {
if (letter == child.Key) {
node = child.Value;
found_letter = true;
break;
}
}
if (found_letter && node.word && c == word.Length) return true;
else if (!found_letter) return false;
c ++;
}
return false;
}
}