2

我想用无序映射实现完美的散列。我有一组编译时已知的字符串,它映射到某些东西。我想为他们生成完美的哈希函数。我发现如果我选择 unordered_map 的大小比已知字符串集的大小大 3 倍,我就能找到一个完美的散列函数(即种子数)。我想尽量减少这个数字。和相关的问题是,如果我使用更大的无序地图,我会得到更快的地图,这是真的吗?

我玩过 Google 的 CityHash 函数: http ://code.google.com/p/cityhash/

#include <sstream>
#include <iostream>
#include <string>
#include <unordered_map>
#include <city.h>

unsigned seed = 0;
const unsigned numberOfTestData = 100;
const unsigned sizeOfPreallocatedMap = 3 * numberOfTestData; // what is the minimum value of this
const unsigned chanceToFindPerfectHashFnSeed = 10000; // in number of iterations
bool foundPerfectHashSeed = false;

int minCollisionCount = 999;

class CityHash {
 public:
  uint64 operator()(const std::string& s) const {  
//  return CityHash64(s.c_str(), s.size());
    return CityHash64WithSeed(s.c_str(), s.size(), seed);
  }
};
class StringEqual {
 public:
  bool operator()(const std::string& left, const std::string& right) const {  
    return left == right;  
  }
};
template<typename T>
void mapTester(T& map)
{
  for (unsigned i = 0; i < numberOfTestData; ++i) {
    std::stringstream ss;
    ss << "TestData_" << i;
    map[ss.str()] = i;
  }

  int collisionCount = 0;
  unsigned maxBucketSize = 0;
  for (size_t i = 0; i < map.bucket_count(); ++i) {
    if (map.bucket_size(i) > 1) {
      collisionCount++;
      if (maxBucketSize <= map.bucket_size(i))
        maxBucketSize = map.bucket_size(i);
    }
  }
  if (collisionCount < minCollisionCount) {
    minCollisionCount = collisionCount;
    std::cout << maxBucketSize << " collision count is " << collisionCount << " with seed " << seed << std::endl;
  }
  if (maxBucketSize == 0 && collisionCount == 0)
    foundPerfectHashSeed = true;
}

int main() {
  std::unordered_map<std::string, int> map;
  mapTester(map);
  for (; seed < chanceToFindPerfectHashFnSeed; ++seed) {
    if (foundPerfectHashSeed)
      break;
    std::unordered_map<std::string, int, CityHash, StringEqual> cityMap(sizeOfPreallocatedMap);
    mapTester(cityMap);
  }
  std::cout << (foundPerfectHashSeed ? "Found!" : "Not found!")  << std::endl;

  return 0;
}
4

0 回答 0