杜鹃散列表通常通过一个拥有多个散列函数的大表来实现,这些散列函数探测整个大表。
插入时首先依次尝试所有散列函数,一旦找到空位便插入。如果试完所有散列函数仍未找到空位,便随机踢出一个,记录最后被踢出的位置(如果下次随机出来的位置与最后被踢出的位置相同,则重新随机)然后将被踢出去的项插入散列表,重复上述步骤。
如果经过一定次数的尝试,仍未成功插入,则更换一组散列函数(不改变表的大小)。若更换五组散列函数后仍未成功,则扩大表的大小(不改变散列函数),重新尝试。
生成一组散列函数的类接口
template <typename AnyType>
class CuckooHashFamily
{
public:
size_t hash(const AnyType & x,int which)const;
int getNumberOfFunction();
void generateNewFunction();
};
字符串散列
template <int count>
class StringHashFamily
{
public:
StringHashFamily():MULTIPLIERS(count)//每组散列函数个数count
{
generateNewFunctions();
}
int getNumberOfFunctions()const
{
return count;
}
void generateNewFunctions()
{
for(auto & mult:MULTIPLIERS)
mult=r.nextInt();
}
size_t hash(const std::string & x,int which)const//散列函数
{
const int multiplier=MULTIPLIERS[which];
size_t hashVal=0;
for(auto ch:x)
hashVal=multiplier*hashVal+ch;
return hashVal;
}
private:
std::vector<int> MULTIPLIERS:
UniformRandom r;
};
完整代码
#include <vector>
bool isPrime(int size)
{
for(int i=3;i*i<=size;i+=2)
{
if(size%i==0)
return false;
}
return true;
}
int nextPrime(int size)
{
if(size<=2)
return 2;
if(size%2==0)
size++;
while(!isPrime(size))
{
size+=2;
}
return size;
}
template <typename AnyType>
class CuckooHashFamily
{
public:
size_t hash(const AnyType & x,int which)const;
int getNumberOfFunction();
void generateNewFunction();
};
template <int count>
class StringHashFamily
{
public:
StringHashFamily():MULTIPLIERS(count)
{
generateNewFunctions();
}
int getNumberOfFunctions()const
{
return count;
}
void generateNewFunctions()
{
for(auto & mult:MULTIPLIERS)
mult=r.nextInt();
}
size_t hash(const std::string & x,int which)const
{
const int multiplier=MULTIPLIERS[which];
size_t hashVal=0;
for(auto ch:x)
hashVal=multiplier*hashVal+ch;
return hashVal;
}
private:
std::vector<int> MULTIPLIERS:
UniformRandom r;
};
template <typename AnyType,typename HashFamily>
class CuckooHashTable
{
public:
explicit CuckooHashTable(int size=101):array(nextPrime(size))
{
numHashFunctions=hashFunctions.getNumberOfFunctions();
rehashes=0;
makeEmpty();
}
void makeEmpty()
{
currentSize=0;
for(auto & entry:array)
{
entry.isActive=false;
}
}
bool contains(const AnyType & x)const
{
return findPos(x)!=-1;
}
bool remove(const AnyType & x)
{
int currentPos=findPos(x);
if(currentPos==-1)
return false;
array[currentPos].isActive=false;
--currentPos;
return true;
}
bool insert(const AnyType & x)
{
if(contains(x))
return false;
if(currentSize>=array.size()*MAX_lOAD)
expand();
return insertHelper1(x);
}
bool insert(AnyType && x);
private:
struct HashEntry
{
AnyType element;
bool isActive;
HashEntry(const AnyType & e=AnyType{},bool a=false):element(e),isActive(a){}
HashEntry(AnyType && e,bool a=false):element(std::move(e)),isActive(a){}
};
//插入例程
bool insertHelper1(const AnyType & xx)
{
const int COUNT_LIMIT=100;//踢出尝试次数
AnyType x=xx;
while(true)
{
int lastPos=-1;
int pos;
for(int count=0;count<COUNT_LIMIT;++count)
{
for(int i=0;i<numHashFunctions;++i)
{
pos=myhash(x, i);
if(!isActive(pos))
{
array[pos]=std::move(HashEntry{std::move(x),true});
++currentSize;
return true;
}
}
int i=0;//如果只有两个散列函数而且两个散列函数计算的位置相同,则可能陷入无限循环,因此需要限制循环次数。
do
{
pos=myhash(x,r.nextInt(numHashFunctions));//随机找出一项踢出
}while(pos==lastPos&&i++<5);
lastPos=pos;//记录最后被踢出的位置
std::swap(x,array[pos].element);//踢出一项
}
if(++rehashes>ALLOWED_REHASHS)
{
expand();
rehashes=0;
}
else
rehash();
}
}
bool insertHelper1(AnyType && xx);
bool isActive(int currentPos)const
{
return array[currentPos].isActive;
}
size_t myhash(const AnyType & x,int which)const
{
return hashFunctions.hash(x,which)%array.size();
}
int findPos(const AnyType & x)const
{
for(int i=0;i<numHashFunctions;++i)
{
int pos=myhash(x,i);
if(isActive(pos)&&array[pos].element==x)
return pos;
}
return -1;
}
void expand()
{
rehash(static_cast<int>(array.size()/MAX_lOAD));
}
void rehash()
{
hashFunctions.generateNewFunctions();
rehash(array.size());
}
void rehash(int newSize)
{
std::vector<HashEntry> oldArray=array;
array.resize(nextPrime(newSize));
for(auto & entry:array)
entry.isActive=false;
currentSize=0;
for(auto & entry:oldArray)
{
if(entry.isActive)
insert(std::move(entry.element));
}
}
constexpr static const double MAX_lOAD=0.40;
static const int ALLOWED_REHASHS=5;
std::vector<HashEntry> array;
int currentSize;
int numHashFunctions;
int rehashes;
UniformRandom r;
HashFamily hashFunctions;
};