Cuckoo Hash 是一种hash冲突解决方法, 其目的是即时使用简易的hash function 也能够实现hash key的均匀分布。
基本思想是使用2个hash函数来处理碰撞,从而每个key都对应到2个位置。
插入操作如下:
1. 对key值hash,生成两个hash key值,hashk1和 hashk2, 如果对应的两个位置上有一个为空,那么直接把key插入即可。
2. 否则,任选一个位置,把key值插入,把已经在那个位置的key值踢出来。
3. 被踢出来的key值,需要重新插入,直到没有key被踢出为止。
查找思路比较简单。
代码实现如下:
// Cuckoo_hash.cpp : Defines the entry point for the console application.
//
#include "stdafx.h"
#include <string>
#include <cmath>
#include <iostream>
using namespace std;
template<class KeyT>
class CuckooHash;
template<>
class CuckooHash<int>
{
private:
int lnBucket; //size of bucket
int *mpKeyBucket1; //the first bucket for first hash
int *mpKeyBucket2; //the second bucket for second hash
enum {MaxLoop = 1000}; //used to control rehash loop
int lnCantInsertNum;
private:
//first hash function
int hHashOne(int& irKey)
{
int lHashKey = 0;
lHashKey = irKey % lnBucket;
return lHashKey;
}
//second hash function
int hHashTwo(int& irKey)
{
int lHashKey = 0;
lHashKey = irKey / lnBucket;
lHashKey = lHashKey % lnBucket;
return lHashKey;
}
//todo: juge one num is Prime NUM or not
bool hIsPrime(int inN)
{
if(inN <= 0) return false;
int last = sqrt((double)inN);
for(int i = 2; i<= last; i++)
{
if(inN % i == 0)
return false;
}
return true;
}
int hGetMinPrime(int inNum)
{
while( !hIsPrime(inNum) ) inNum ++;
return inNum;
}
//try to rehash all the other key
bool hReHash(int iKey, int deeps)
{
if(deeps <= 0) return false;
int lHashKey1 = hHashOne(iKey);
int lHashKey2 = hHashTwo(iKey);
if(iKey == mpKeyBucket1[lHashKey1])
{
if(mpKeyBucket2[lHashKey2] == 0)
{
mpKeyBucket2[lHashKey2] = iKey;
return true;
}
else
{
if( hReHash(mpKeyBucket2[lHashKey2], deeps - 1) )
{ mpKeyBucket2[lHashKey2] = iKey;
return true;
}
}
}
else if(iKey == mpKeyBucket2[lHashKey2])
{
if(mpKeyBucket1[lHashKey1] == 0)
{
mpKeyBucket1[lHashKey1] = iKey;
return true;
}
else
{
if( hReHash(mpKeyBucket1[lHashKey1], deeps - 1))
{
mpKeyBucket1[lHashKey1] = iKey;
return true;
}
}
}
return false;
}
public:
CuckooHash(int inNum)
{
lnBucket = inNum;
mpKeyBucket1 = NULL;
mpKeyBucket2 = NULL;
lnCantInsertNum = 0;
}
void InitHashTable()
{
lnBucket = hGetMinPrime(lnBucket);
mpKeyBucket1 = new int[lnBucket];
memset(mpKeyBucket1, 0, sizeof(int) * lnBucket);
mpKeyBucket2 = new int[lnBucket];
memset(mpKeyBucket2, 0, sizeof(int) * lnBucket);
}
~CuckooHash()
{
if(mpKeyBucket1)
delete[] mpKeyBucket1;
if(mpKeyBucket2)
delete[] mpKeyBucket2;
}
void Insert(int& irKey)
{
if(find(irKey)) return;
int lHashKey1 = hHashOne(irKey);
int lHashKey2 = hHashTwo(irKey);
if(mpKeyBucket1[lHashKey1] == 0)
mpKeyBucket1[lHashKey1] = irKey;
else if(mpKeyBucket2[lHashKey2] == 0)
mpKeyBucket2[lHashKey2] = irKey;
else
{
if(hReHash(mpKeyBucket1[lHashKey1], MaxLoop))
mpKeyBucket1[lHashKey1] = irKey;
else if(hReHash(mpKeyBucket2[lHashKey2], MaxLoop))
mpKeyBucket2[lHashKey2] = irKey;
else
lnCantInsertNum ++;
}
cout << "After insert : " << irKey << endl;
cout << lHashKey1 << " " << lHashKey2 << endl;
PrintBucket4Test();
}
bool find(int& irKey)
{
int lHashKey1 = hHashOne(irKey);
if(mpKeyBucket1 && mpKeyBucket1[lHashKey1] == irKey)
return true;
int lHashKey2 = hHashTwo(irKey);
if(mpKeyBucket2 && mpKeyBucket2[lHashKey2] == irKey)
return true;
return false;
}
void PrintBucket4Test()
{
for(int i = 0; i<lnBucket; i++ )
cout << mpKeyBucket1[i] << ' ';
cout << endl;
for(int i = 0; i<lnBucket; i++ )
cout << mpKeyBucket2[i] << ' ';
cout << endl;
}
};
int _tmain(int argc, _TCHAR* argv[])
{
CuckooHash<int> CKHash(12);
CKHash.InitHashTable();
int a[] = {20, 50, 53, 75, 100, 67, 105, 3, 36, 39, 6};
for(int i = 0; i< sizeof(a)/sizeof(int); i++)
{
CKHash.Insert(a[i]);
}
int b;
cin >> b;
return 0;
}
http://www.it-c.dk/people/pagh/papers/cuckoo-undergrad.pdf
http://www.it-c.dk/people/pagh/papers/cuckoo-jour.pdf
http://en.wikipedia.org/wiki/Cuckoo_hashing
http://hi.baidu.com/algorithms/blog/item/eb89b582add48f95f703a61e.html