c++ - 双探针哈希表

Question

我正在尝试编辑我的哈希表以形成一个双哈希类，但似乎无法正确处理。

我想知道是否有人有任何见解。有人告诉我，我需要做的就是编辑 findPos()，现在我必须使用新策略提供新的探针。

**我做了一些研究，它说在双重探测中你会使用 R-(x mod R) 其中 R >size 和一个小于表格大小的素数。那么我要制作一个新的重新散列函数吗？

这是我的代码：

template <typename HashedObj>
class HashTable
{
  public:
    explicit HashTable( int size = 101 ) : array( nextPrime( size ) )
      { makeEmpty( ); }

    bool contains( const HashedObj & x ) const
    {
        return isActive( findPos( x ) );
    }

    void makeEmpty( )
    {
        currentSize = 0;
        for( auto & entry : array )
            entry.info = EMPTY;
    }

    bool insert( const HashedObj & x )
    {
            // Insert x as active
        int currentPos = findPos( x );
        if( isActive( currentPos ) )
            return false;

        if( array[ currentPos ].info != DELETED )
            ++currentSize;

        array[ currentPos ].element = x;
        array[ currentPos ].info = ACTIVE;
            // Rehash; 
        if( currentSize > array.size( ) / 2 )
            rehash( );
        return true;
    }

    bool insert( HashedObj && x )
    {
            // Insert x as active
        int currentPos = findPos( x );
        if( isActive( currentPos ) )
            return false;

        if( array[ currentPos ].info != DELETED )
            ++currentSize;

        array[ currentPos ] = std::move( x );
        array[ currentPos ].info = ACTIVE;

            // Rehash; see Section 5.5
        if( currentSize > array.size( ) / 2 )
            rehash( );

        return true;
    }

    bool remove( const HashedObj & x )
    {
        int currentPos = findPos( x );
        if( !isActive( currentPos ) )
            return false;

        array[ currentPos ].info = DELETED;
        return true;
    }

    enum EntryType { ACTIVE, EMPTY, DELETED };

  private:
    struct HashEntry
    {
        HashedObj element;
        EntryType info;

        HashEntry( const HashedObj & e = HashedObj{ }, EntryType i = EMPTY )
          : element{ e }, info{ i } { }

        HashEntry( HashedObj && e, EntryType i = EMPTY )
          : element{ std::move( e ) }, info{ i } { }
    };

    vector<HashEntry> array;
    int currentSize;

    bool isActive( int currentPos ) const
      { return array[ currentPos ].info == ACTIVE; }

    int findPos( const HashedObj & x ) const
    {
        int offset = 1;
        int currentPos = myhash( x );

        while( array[ currentPos ].info != EMPTY &&
               array[ currentPos ].element != x )
        {
            currentPos += offset;  // Compute ith probe
            offset += 2;
            if( currentPos >= array.size( ) )
                currentPos -= array.size( );
        }

        return currentPos;
    }

    void rehash( )
    {
        vector<HashEntry> oldArray = array;

            // Create new double-sized, empty table
        array.resize( nextPrime( 2 * oldArray.size( ) ) );
        for( auto & entry : array )
            entry.info = EMPTY;

            // Copy table over
        currentSize = 0;
        for( auto & entry : oldArray )
            if( entry.info == ACTIVE )
                insert( std::move( entry.element ) );
    }

    size_t myhash( const HashedObj & x ) const
    {
        static hash<HashedObj> hf;
        return hf( x ) % array.size( );
    }
};

score 0 · Accepted Answer

看来您想为探测实现双重散列。这是一种通过使用输入键的第二个散列来解决冲突的技术。在原始二次函数中，您不断地向索引值添加一个递增的偏移量，直到您在哈希表中找到一个空白点。双散列函数的唯一重要区别是偏移量的值。

如果我是你，我会创建一个类似于第一个哈希函数的新哈希函数，但我会将 return 语句替换return R - ( hf(x) % R );为提供的 R 值。然后我将更改findPos设置为offset等于第二个哈希函数（还要记住删除该offset += 2;行，因为偏移量不再增加）。

score 0 · Accepted Answer

我不确定是否理解您的代码，但让我提出一些意见，它们不应被视为答案，但它们的大小大于允许评论的大小。

如果你使用二次探测，那么我认为在方法中findPos()你应该提前currentPos一些 as currentPos*currentPos % array.size()。目前，正如我所见，你增加currentPos一个单位（offset最初是 1），在 2 个单位之后，在 4 个单位之后等等
可能您正在尝试一种快速计算二次探针的方法。如果是这种情况，那么offset不应该增加二，而是乘以二。那将是一些offset *= 2，但因为你应该计算你应该增加的碰撞次数offset。
也许更简单的方法是：

currentPos += 2*offset++ - 1; // fast way of doing quadratic resolution

您的调整大小是可以的，因为它保证表至少有一半是空的，因此可以保证在执行插入时搜索可用条目。

祝你好运

c++ - 双探针哈希表

2 回答 2

Related

Reference