djb2 算法:
unsigned long hash(unsigned char *str)
{
//hash种子
unsigned long hash = 5381;
int c;
//遍历字符串中每一个字符
while (c = *str++)
//对hash种子 进行位运算 hash <
//然后再加上字符的ascii码,之后循环次操作
hash = ((hash <
return hash;
}
至于种子为什么选择 5381,通过搜索得到以下结论,该数算一个魔法常量:
5381是个奇数
5381是质数
5381是缺数
二进制分布均匀:001/010/100/000/101
由于本人对算法是一窍不通,以上特点对hash结果会有什么影响实在不懂,希望高手们能解释一下。
Redis算法对djbhash的实现方法如下(以下代码在 src/dict.c ):
//hash种子,默认为 5381
static uint32_t dict_hash_function_seed = 5381;
//设置hash种子
void dictSetHashFunctionSeed(uint32_t seed) {
dict_hash_function_seed = seed;
}
//获取hash种子
uint32_t dictGetHashFunctionSeed(void) {
return dict_hash_function_seed;
}
/* And a case insensitive hash function (based on djb hash) */
unsigned int dictGenCaseHashFunction(const unsigned char *buf, int len) {
//得到hash种子
unsigned int hash = (unsigned int)dict_hash_function_seed;
//遍历字符串
while (len--)
//使用dbj算法反复乘以33并加上字符串转小写后的ascii码
hash = ((hash <
return hash;
}
Redis对djbhash做了一个小小的修改,将需要处理的字符串进行了大小写的转换,是的hash算法的结果与大小写无关。
MurmurHash2算法:
uint32_t MurmurHash2( const void * key, int len, uint32_t seed )
{
// 'm' and 'r' are mixing constants generated offline.
// They're not really 'magic', they just happen to work well.
const uint32_t m = 0x5bd1e995;
const int r = 24;
// Initialize the hash to a 'random' value
uint32_t h = seed ^ len;
// Mix 4 bytes at a time into the hash
const unsigned char * data = (const unsigned char *)key;
while(len >= 4)
{
//每次循环都将4个字节的 字符 转成一个int类型
uint32_t k = *(uint32_t*)data;
k *= m;
k ^= k >> r;
k *= m;
h *= m;
h ^= k;
data += 4;
len -= 4;
}
// Handle the last few bytes of the input array
//处理结尾不足4个字节的数据,通过移位操作将其转换为一个int型数据
switch(len)
{
case 3: h ^= data[2] <
case 2: h ^= data[1] <
case 1: h ^= data[0];
h *= m;
};
// Do a few final mixes of the hash to ensure the last few
// bytes are well-incorporated.
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}
参考资料:
Redis2.8.9源码 src/dict.h src/dict.c