我为字典查找类构建了一个 trie。它似乎工作正常,除了特里相当大。似乎大约是 80 MB,从我读过的内容来看,它应该只有 5 MB 大。我不确定是什么让 trie 气球达到 80 MB,但一旦加载它,它就会运行得非常快。
特里类
public class Trie {
private TrieNode root = new TrieNode();
public const int ASCIIA = 97;
public TrieNode Insert(string word) {
char[] charArray = word.ToLower().ToCharArray();
TrieNode node = root;
foreach (char character in charArray) {
node = Insert(character, node);
}
node.IsEnd = true;
return root;
}
private TrieNode Insert(char character, TrieNode node) {
if (node.Contains(character)) {
return node.GetChild(character);
} else {
int number = System.Convert.ToByte(character) - TrieNode.ASCIIA;
TrieNode treeNode = new TrieNode();
node.nodes[number] = treeNode;
treeNode.Value = number;
return treeNode;
}
}
TrieNode 类:
public class TrieNode {
public TrieNode[] nodes;
public bool IsEnd {get; set;}
public int Value {get; set;}
public const int ASCIIA = 97;
public const int ENGL = 26;
public TrieNode() {
nodes = new TrieNode[ENGL];
}
public bool Contains(char character) {
if (character == 0)
return false;
int number = System.Convert.ToByte(character) - ASCIIA;
if (number > ENGL)
return false;
return (nodes[number] != null);
}
public bool Contains(int character) {
if (character == 0)
return false;
return (nodes[character] != null);
}
public TrieNode GetChild(char character) {
int number = System.Convert.ToByte(character) - ASCIIA;
return nodes[number];
}
public TrieNode GetChild(int character) {
return nodes[character];
}
然后使用包含 170,000 个单词的字典对 Gen the trie:
string[] lines = fileTXT.Split("\n"[0]);
for (int i = 0; i < data.Length;i++) {
trieDict.Insert(data[i]);
}