由于所有的辩论,我运行了一个演示:
下载NASDAQ 股票代码并从该集合中获取 100 个随机样本,应用 gperf 如下:
gperf -e ' \015' -L C++ -7 -C -E -k '*,1,$' -m 100 selection > perfhash.cpp
产生一个哈希值 MAX_HASH_VALUE157
和一个包含尽可能多项目的直接字符串查找表。这里只是用于演示目的的哈希函数:
inline unsigned int Perfect_Hash::hash (register const char *str, register unsigned int len) {
static const unsigned char asso_values[] = {
156, 156, 156, 156, 156, 156, 156, 156, 156, 156,
156, 156, 156, 156, 156, 156, 156, 156, 156, 156,
156, 156, 156, 156, 156, 156, 156, 156, 156, 156,
156, 156, 156, 156, 156, 156, 156, 156, 156, 156,
156, 156, 156, 156, 156, 156, 156, 156, 156, 156,
156, 156, 156, 156, 156, 156, 156, 156, 156, 156,
156, 156, 156, 156, 156, 64, 40, 1, 62, 1,
41, 18, 47, 0, 1, 11, 10, 57, 21, 7,
14, 13, 24, 3, 33, 89, 11, 0, 19, 5,
12, 0, 156, 156, 156, 156, 156, 156, 156, 156,
156, 156, 156, 156, 156, 156, 156, 156, 156, 156,
156, 156, 156, 156, 156, 156, 156, 156, 156, 156,
156, 156, 156, 156, 156, 156, 156, 156, 156
};
register int hval = len;
switch (hval) {
default: hval += asso_values[(unsigned char)str[4]]; /*FALLTHROUGH*/
case 4: hval += asso_values[(unsigned char)str[3]]; /*FALLTHROUGH*/
case 3: hval += asso_values[(unsigned char)str[2]+1]; /*FALLTHROUGH*/
case 2: hval += asso_values[(unsigned char)str[1]]; /*FALLTHROUGH*/
case 1: hval += asso_values[(unsigned char)str[0]]; break;
}
return hval;
}
它真的没有变得更有效率。请查看github 上的完整源代码:https://gist.github.com/sehe/5433535
请注意,这也是一个完美的哈希,所以不会有冲突