哈希表是一种常用的数据结构。它们包括一个数组(哈希表)和映射(哈希函数)。哈希函数把关键码值映射到表中一个位置。存储在哈希表中的数据必须有对应的键。哈希函数将一个项的键映射到一个哈希值,并且该哈希值被用作该项的一个索引到该项的哈希表中。这使得数据能够快速访问。
哈希算法的模型
initialize the internal state; for (each block of the input) { combine (the internal state, the current input block); mix( the internal state); } value = postprocess( the internal state ); return (value);
例如XORhash:
char XORhash( char *key, int len) { char hash; int i; for (hash=0, i=0; i<len; ++i) hash=hash^key[i]; return (hash%101); /* 101 is prime */ }
32位哈希
<pre>typedef unsigned long int u4; /* unsigned 4-byte type */
typedef unsigned char u1; /* unsigned 1-byte type */
/* The mixing step */
#define mix(a,b,c) \
{ \
a=a-b; a=a-c; a=a^(c>>13); \
b=b-c; b=b-a; b=b^(a<<8); \
c=c-a; c=c-b; c=c^(b>>13); \
a=a-b; a=a-c; a=a^(c>>12); \
b=b-c; b=b-a; b=b^(a<<16); \
c=c-a; c=c-b; c=c^(b>>5); \
a=a-b; a=a-c; a=a^(c>>3); \
b=b-c; b=b-a; b=b^(a<<10); \
c=c-a; c=c-b; c=c^(b>>15); \
}
/* The whole new hash function */
u4 hash( k, length, initval)
register u1 *k; /* the key */
u4 length; /* the length of the key in bytes */
u4 initval; /* the previous hash, or an arbitrary value */
{
register u4 a,b,c; /* the internal state */
u4 len; /* how many key bytes still need mixing */
/* Set up the internal state */
len = length;
a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
c = initval; /* variable initialization of internal state */
/*---------------------------------------- handle most of the key */
while (len >= 12)
{
a=a+(k[0]+((u4)k[1]<<8)+((u4)k[2]<<16) +((u4)k[3]<<24));
b=b+(k[4]+((u4)k[5]<<8)+((u4)k[6]<<16) +((u4)k[7]<<24));
c=c+(k[8]+((u4)k[9]<<8)+((u4)k[10]<<16)+((u4)k[11]<<24));
mix(a,b,c);
k = k+12; len = len-12;
}
/*------------------------------------- handle the last 11 bytes */
c = c+length;
switch(len) /* all the case statements fall through */
{
case 11: c=c+((u4)k[10]<<24);
case 10: c=c+((u4)k[9]<<16);
case 9 : c=c+((u4)k[8]<<8);
/* the first byte of c is reserved for the length */
case 8 : b=b+((u4)k[7]<<24);
case 7 : b=b+((u4)k[6]<<16);
case 6 : b=b+((u4)k[5]<<8);
case 5 : b=b+k[4];
case 4 : a=a+((u4)k[3]<<24);
case 3 : a=a+((u4)k[2]<<16);
case 2 : a=a+((u4)k[1]<<8);
case 1 : a=a+k[0];
/* case 0: nothing left to add */
}
mix(a,b,c);
/*-------------------------------------------- report the result */
return c;
}
The new hash deals with blocks of 12 bytes, rather than 1 byte at a time like most hashes.The whole hash, including the mix, takes about 6m+35 instructions to hash m bytes
64位哈希
<pre>#define mix64(a,b,c) \
{ \
a=a-b; a=a-c; a=a^(c>>43); \
b=b-c; b=b-a; b=b^(a<<9); \
c=c-a; c=c-b; c=c^(b>>8); \
a=a-b; a=a-c; a=a^(c>>38); \
b=b-c; b=b-a; b=b^(a<<23); \
c=c-a; c=c-b; c=c^(b>>5); \
a=a-b; a=a-c; a=a^(c>>35); \
b=b-c; b=b-a; b=b^(a<<49); \
c=c-a; c=c-b; c=c^(b>>11); \
a=a-b; a=a-c; a=a^(c>>12); \
b=b-c; b=b-a; b=b^(a<<18); \
c=c-a; c=c-b; c=c^(b>>22); \
}
The whole 64-bit hash takes about 5 m +41 instructions to hash m bytes.
细究慢慢看英文吧。
http://burtleburtle.net/bob/hash/evahash.html