0

查看https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp我不这么认为,但我想检查一下。

情况是这样的,如果我有一个 1、2、3 或 4 个字节的键,那么简单地将这些字节的数值而不是散列到 8 个字节是否可靠,或者这些会导致大于 4 的键发生冲突用 murmur3 散列的字节?

4

1 回答 1

1

这样的属性对于散列函数来说是一个不好的属性。它有效地缩小了函数共域,增加了碰撞机会,所以看起来不太可能。

此外,这篇博文还提供了 MurmurHash 的求逆函数:

uint64 murmur_hash_64(const void * key, int len, uint64 seed)
{
    const uint64 m = 0xc6a4a7935bd1e995ULL;
    const int r = 47;

    uint64 h = seed ^ (len * m);

    const uint64 * data = (const uint64 *)key;
    const uint64 * end = data + (len / 8);

    while (data != end)
    {
#ifdef PLATFORM_BIG_ENDIAN
        uint64 k = *data++;
        char *p = (char *)&k;
        char c;
        c = p[0]; p[0] = p[7]; p[7] = c;
        c = p[1]; p[1] = p[6]; p[6] = c;
        c = p[2]; p[2] = p[5]; p[5] = c;
        c = p[3]; p[3] = p[4]; p[4] = c;
#else
        uint64 k = *data++;
#endif

        k *= m;
        k ^= k >> r;
        k *= m;

        h ^= k;
        h *= m;
    }

    const unsigned char * data2 = (const unsigned char*)data;

    switch (len & 7)
    {
    case 7: h ^= uint64(data2[6]) << 48;
    case 6: h ^= uint64(data2[5]) << 40;
    case 5: h ^= uint64(data2[4]) << 32;
    case 4: h ^= uint64(data2[3]) << 24;
    case 3: h ^= uint64(data2[2]) << 16;
    case 2: h ^= uint64(data2[1]) << 8;
    case 1: h ^= uint64(data2[0]);
        h *= m;
    };

    h ^= h >> r;
    h *= m;
    h ^= h >> r;

    return h;
}

uint64 murmur_hash_64_inverse(uint64 h, uint64 seed)
{
    const uint64 m = 0xc6a4a7935bd1e995ULL;
    const uint64 minv = 0x5f7a0ea7e59b19bdULL; // Multiplicative inverse of m under % 2^64
    const int r = 47;

    h ^= h >> r;
    h *= minv;
    h ^= h >> r;
    h *= minv;

    uint64 hforward = seed ^ (((uint64)8) * m);
    uint64 k = h ^ hforward;

    k *= minv;
    k ^= k >> r;
    k *= minv;

#ifdef PLATFORM_BIG_ENDIAN
    char *p = (char *)&k;
    char c;
    c = p[0]; p[0] = p[7]; p[7] = c;
    c = p[1]; p[1] = p[6]; p[6] = c;
    c = p[2]; p[2] = p[5]; p[5] = c;
    c = p[3]; p[3] = p[4]; p[4] = c;
#endif

    return k;
}

您可以根据需要找到任意数量的带有哈希值<2^32的输入。

您关于可靠性的问题没有多大意义:您必须始终准备好正确处理碰撞。根据我的实践,我不建议使用纯整数或指针值作为哈希,因为它们会产生不希望的模式。

于 2016-11-05T19:01:18.397 回答