我在我的 Windows 机器上处理这个哈希表,一切都是肉汁。我刚刚将它转移到我的 linux 机器上,现在我在打印功能中的 cout 完全乱码了。在 test1 中,您可以看到它输出所有 i 值,而在 test2 中,您可以看到它仅在 i = 6 时输出 i。这是我在 processFile 函数中所做的事情吗?
int main ( ) {
Hash hashTable;
cout << setprecision ( 10 );
cout << "Test 1 - print empty table" << endl;
hashTable.print ();
cout << "-------------------------------------------------------------"
<< endl;
cout << "Test 2 - processing input file" << endl;
hashTable.processFile ("test1.txt");
hashTable.print ();
}
void Hash::print ()
{
for(int i=0;i<HASH_TABLE_SIZE;i++)
{
cout << i << ":\t";
for(list<string>::iterator it=hashTable[i].begin(); it != hashTable[i].end();++it)
cout << *it << ", ";
cout << endl;
}
}
void Hash::processFile (string filename)
{
ifstream fp(filename.c_str());
if (!fp.is_open()) {
cout << "can't open file " << endl;
}
string word;
while (getline(fp, word)) {
int key = hf(word);
if(!hashTable[key].empty())
collisions++;
hashTable[key].push_back(word);
int x = hashTable[key].size();
if(x > longestList)
longestList = x;
double totalLength = 0;
for(int i=0;i<HASH_TABLE_SIZE;i++)
totalLength+=hashTable[i].size();
double newAvgLength = totalLength/HASH_TABLE_SIZE;
avgLength = (newAvgLength + avgLength) / 2.0;
}
fp.close();
}
输出:
Test 1 - print empty table
0:
1:
2:
3:
4:
5:
6:
7:
8:
9:
Test 2 - processing input file
, ttttt ooooo
, rrrrr ppppp
, iiiii
, nnnnn heath
, jjjjj
, harps hello
6: uuuuu,
, qqqqq mmmmm
, lllll
, sssss kkkkk
预期输出:
Test 1 - print empty table
0:
1:
2:
3:
4:
5:
6:
7:
8:
9:
Test 2 - processing input file
0:
1:
2: kkkkk, lllll, ppppp, sssss,
3: hello, heath, harps,
4:
5: happy, jjjjj, nnnnn, ooooo,
6: iiiii, uuuuu,
7: ttttt,
8: mmmmm, rrrrr,
9: qqqqq,
散列函数
Hash::hf (string ins) {
return MurmurHash2(ins.c_str(),ins.size(),11);
}
unsigned int Hash::MurmurHash2 (const void *key, int len, unsigned int seed )
{
// 'm' and 'r' are mixing constants generated offline.
// They're not really 'magic', they just happen to work well.
const unsigned int m = 0x5bd1e995;
const int r = 24;
// Initialize the hash to a 'random' value
unsigned int h = seed ^ len;
// Mix 4 bytes at a time into the hash
const unsigned char * data = (const unsigned char *)key;
while(len >= 4)
{
unsigned int k = *(unsigned int *)data;
k *= m;
k ^= k >> r;
k *= m;
h *= m;
h ^= k;
data += 4;
len -= 4;
}
// Handle the last few bytes of the input array
switch(len)
{
case 3: h ^= data[2] << 16;
case 2: h ^= data[1] << 8;
case 1: h ^= data[0];
h *= m;
};
// Do a few final mixes of the hash to ensure the last few
// bytes are well-incorporated.
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h % HASH_TABLE_SIZE;
}
哈希定义
#ifndef __HASH_H
#define __HASH_H
#include <string>
#include <list>
#include <iostream>
#include <sstream>
#include <vector>
using namespace std;
class Hash {
public:
void remove (string word);
void print ();
void processFile (string filename);
bool search (string word);
void output (string filename);
void printStats ();
private:
list<string> hashTable [HASH_TABLE_SIZE];
int collisions;
int longestList;
double avgLength;
private:
int hf (string word);
unsigned int MurmurHash2 (const void *key, int len, unsigned int seed);
public:
Hash(){collisions = 0; longestList = 0; avgLength = 0;}
};
#endif