2

我在我的 Windows 机器上处理这个哈希表,一切都是肉汁。我刚刚将它转移到我的 linux 机器上,现在我在打印功能中的 cout 完全乱码了。在 test1 中,您可以看到它输出所有 i 值,而在 test2 中,您可以看到它仅在 i = 6 时输出 i。这是我在 processFile 函数中所做的事情吗?

int main ( ) {

    Hash hashTable;
    cout << setprecision ( 10 );
    cout << "Test 1 - print empty table" << endl;
    hashTable.print ();
    cout << "-------------------------------------------------------------"
    << endl;
    cout << "Test 2 - processing input file" << endl;
    hashTable.processFile ("test1.txt");
    hashTable.print ();
}

void Hash::print ()
{
    for(int i=0;i<HASH_TABLE_SIZE;i++)
    {
        cout << i << ":\t";
        for(list<string>::iterator it=hashTable[i].begin(); it != hashTable[i].end();++it)
            cout << *it << ", ";
        cout << endl;
    }
}

void Hash::processFile (string filename)
{
    ifstream fp(filename.c_str());
    if (!fp.is_open()) {
        cout << "can't open file " << endl;
    }
    string word;
    while (getline(fp, word)) {

        int key = hf(word);
        if(!hashTable[key].empty())
            collisions++;

        hashTable[key].push_back(word);
        int x = hashTable[key].size();
        if(x > longestList)
            longestList = x;

        double totalLength = 0;
        for(int i=0;i<HASH_TABLE_SIZE;i++)
            totalLength+=hashTable[i].size();
        double newAvgLength = totalLength/HASH_TABLE_SIZE;
        avgLength = (newAvgLength + avgLength) / 2.0;

    }
    fp.close();
}

输出:

Test 1 - print empty table
0:  
1:  
2:  
3:  
4:  
5:  
6:  
7:  
8:  
9:  

Test 2 - processing input file
, ttttt ooooo
, rrrrr ppppp
,   iiiii
, nnnnn heath
,   jjjjj
, harps hello
6:  uuuuu, 
, qqqqq mmmmm
,   lllll
, sssss kkkkk

预期输出:

Test 1 - print empty table
0:
1:
2:
3:
4:
5:
6:
7:
8:
9:


Test 2 - processing input file
0:
1:
2:      kkkkk, lllll, ppppp, sssss,
3:      hello, heath, harps,
4:
5:      happy, jjjjj, nnnnn, ooooo,
6:      iiiii, uuuuu,
7:      ttttt,
8:      mmmmm, rrrrr,
9:      qqqqq,

散列函数

Hash::hf (string ins) {
return MurmurHash2(ins.c_str(),ins.size(),11); 
}
unsigned int Hash::MurmurHash2 (const void *key, int len, unsigned int seed )
{
// 'm' and 'r' are mixing constants generated offline.
// They're not really 'magic', they just happen to work well.
const unsigned int m = 0x5bd1e995;
const int r = 24;

// Initialize the hash to a 'random' value

unsigned int h = seed ^ len;

// Mix 4 bytes at a time into the hash

const unsigned char * data = (const unsigned char *)key;

while(len >= 4)
{
    unsigned int k = *(unsigned int *)data;

    k *= m; 
    k ^= k >> r; 
    k *= m; 

    h *= m; 
    h ^= k;

    data += 4;
    len -= 4;
}

// Handle the last few bytes of the input array

switch(len)
{
case 3: h ^= data[2] << 16;
case 2: h ^= data[1] << 8;
case 1: h ^= data[0];
        h *= m;
};

// Do a few final mixes of the hash to ensure the last few
// bytes are well-incorporated.

h ^= h >> 13;
h *= m;
h ^= h >> 15;

return h % HASH_TABLE_SIZE;
}

哈希定义

#ifndef __HASH_H
#define __HASH_H

#include <string>
#include <list>
#include <iostream>
#include <sstream>
#include <vector>

using namespace std;

class Hash {

public:
void remove (string word);
void print ();
void processFile (string filename);
bool search (string word);
void output (string filename);
void printStats ();

private:
list<string> hashTable [HASH_TABLE_SIZE];
int collisions;
int longestList;
double avgLength;

private:
int hf (string word);
unsigned int MurmurHash2 (const void *key, int len, unsigned int seed);
public:
Hash(){collisions = 0; longestList = 0; avgLength = 0;}

};

#endif
4

1 回答 1

2

您的输入文件已损坏。具体来说,它使用 Windows 样式的行结尾而不是 UNIX 样式的行结尾。

要确认此运行,请在您的 Linux 计算机上file /tmp/xx(其中 /tmp/xx 替换为您的文件名)。输出将显示“ASCII 文本,带有 CRLF 行终止符”。

于 2013-11-06T17:33:20.333 回答