最近我在做一个项目,其中要用到一个数据结构——Hash Table(哈希表),以前只有理论知识,现在实却发现很不简单,所以写下来和大家共分享。
我们知道,哈希表是一个固定大小的数组,数组的每个元素是一个链表(单向或双向)的头指针。如果Key一样,则在一起,如果Key不一样,则不在一起。哈希表的查询是飞快的。因为它不需要从头搜索,它利用Key的“哈希算法”直接定位,查找非常快,各种数据库中的数据结构基本都是它。但带来的问题是,哈希表的尺寸、哈希算法。
哈希表的数组是定长的,如果太大,则浪费,如果太小,体现不出效率。合适的数组大小是哈希表的性能的关键。哈希表的尺寸最好是一个质数,最小的质数尺寸是17。
当然,根据不同的数据量,会有不同的哈希表的大小。对于数据量很时多时少的应用,最好的设计是使用动态可变尺寸的哈希表,那么如果你发现哈希表尺寸太小了,比如其中的元素是哈希表尺寸的2倍时,我们就需要扩大哈希表尺寸,一般是扩大一倍。下面的数库是哈希表变化尺寸时尺寸大小的一个列表。
static int prime_array[] = {
17, /* 0 */
37, /* 1 */
79, /* 2 */
163, /* 3 */
331, /* 4 */
673, /* 5 */
1361, /* 6 */
2729, /* 7 */
5471, /* 8 */
10949, /* 9 */
21911, /* 10 */
43853, /* 11 */
87719, /* 12 */
175447, /* 13 */
350899, /* 14 */
701819, /* 15 */
1403641, /* 16 */
2807303, /* 17 */
5614657, /* 18 */
11229331, /* 19 */
22458671, /* 20 */
44917381, /* 21 */
89834777, /* 22 */
179669557, /* 23 */
359339171, /* 24 */
718678369, /* 25 */
1437356741, /* 26 */
2147483647 /* 27 (largest signed int prime) */
};
#define PRIME_ARRAY_SIZE (28)
要使用哈希表,就一定要用一个哈希算法,来确定KEY值,这似乎是个很难的事,下面是一个哈希算法:
typedef struct _hTab{
hLinks* link; /* 一个链表 */
int num; /* 成员个数 */
int size; /* 表的尺寸 */
} hTab;
static unsigned int
getHashIndex(hTab *tabPtr, const char *key)
{
unsigned int ha = 0;
while (*key)
ha = (ha * 128 + *key++) % tabPtr->size;
return ha;
}
(其中key是一个字符串,hTab就是一个哈希表结构, tabPtr->size是哈希表数组的大小)
请支持原创作者, 转载请说明出处!
#ifndef _HASHTABLE_H
#define _HASHTABLE_H
struct hashtable;
struct hashtable* hashtable_create(unsigned long size, unsigned long(*hash_func)(const void *key),
int(*test_func)(const void *key1, const void *key2));
struct hashtable* make_string_hashtable(unsigned long size);
int hashtable_put(struct hashtable *ht, const void *key, void *value);
void* hashtable_get(struct hashtable *ht, const void *key);
int hashtable_remove(struct hashtable *ht, const void *key);
int hashtable_contains(struct hashtable *ht, const void *key);
int hashtable_set(struct hashtable *ht, const void *key, void *newvalue);
unsigned long hashtable_count(struct hashtable *ht);
void hashtable_map(struct hashtable *ht, int(*mapfunc)(void*, void*, void*), void*);
void hashtable_clear(struct hashtable *ht);
void hashtable_close(struct hashtable *ht);
#endif
#include <string.h>
#include <stdlib.h>
#include <limits.h>
#include "hashtable.h"
/* Hashtable MAX fullness, you can amend it, but it may best, i think. */
#define HASH_MAX_FULLNESS 0.75
#define HASH_RESIZE_FACTOR 2
#define HASH_POSITION(key, hash_func, size) ((hash_func)(key) % size)
/* Because linuxget hashtable allow 0/NULL key. So we use -1 to point
empty hash mapping. */
#define INVALID_PTR ((void*) ~(unsigned long)0)
#define INVALID_PTR_BYTE 0xff
#define NON_EMPTY(mapping) ((mapping)->key != INVALID_PTR)
typedef unsigned long (*hash_func_t)(const void *key);
typedef int (*test_func_t)(const void *key1, const void *key2);
struct hash_mapping {
void *key;
void *value;
};
struct hashtable {
hash_func_t hash_func; /* Hash function pointer. */
test_func_t test_func; /* Hash key compare function pointer. */
struct hash_mapping *mappings; /* Hashtable data entries. */
unsigned long count; /* Current hashtable not NULL entry count. */
unsigned long size; /* Current hashtable size. */
int prime_offset; /* The offset for prime size. */
unsigned long resize_threshold; /* Hashtable resize threshold, when size more than this
, grow it. */
};
/***********************************************************************
* Not link functions.
***********************************************************************/
/* Prime the hashtable size. */
static unsigned long prime_size(unsigned long size, int *prime_offset) {
static const unsigned long primes[] = {
13, 19, 29, 41, 59, 79, 107, 149, 197, 263, 347, 457, 599, 787, 1031,
1361, 1777, 2333, 3037, 3967, 5167, 6719, 8737, 11369, 14783,
19219, 24989, 32491, 42257, 54941, 71429, 92861, 120721, 156941,
204047, 265271, 344857, 448321, 582821, 757693, 985003, 1280519,
1664681, 2164111, 2813353, 3657361, 4754591, 6180989, 8035301,
10445899, 13579681, 17653589, 22949669, 29834603, 38784989,
50420551, 65546729, 85210757, 110774011, 144006217, 187208107,
243370577, 316381771, 411296309, 534685237, 695090819, 903618083,
1174703521, 1527114613, 1837299131, 2147483647};
int i;
for(i = *prime_offset; i < sizeof(primes) / sizeof(unsigned long); ++i) {
if(primes[i] >= size) {
*prime_offset = i + 1;
return primes[i];
}
}
abort(); /* Hash table range out. */
}
static int grow_hashtable(struct hashtable *ht) {
if(!ht)
return 0;
unsigned long newsize = prime_size(ht->size * HASH_RESIZE_FACTOR, &ht->prime_offset);
ht->mappings = realloc(ht->mappings, newsize * sizeof(struct hash_mapping));
memset(ht->mappings + ht->size, INVALID_PTR_BYTE,
(newsize - ht->size) * sizeof(struct hash_mapping));
ht->size = newsize;
ht->resize_threshold = newsize * HASH_MAX_FULLNESS;
return 1;
}
/* Hash function. If not give customer hash function, use it.
This implementation is the Robert Jenkins' 32 bit Mix Function,
with a simple adaptation for 64-bit values.*/
static unsigned long hash_pointer(const void *key) {
unsigned long hashval = (unsigned long)key;
hashval += (hashval << 12);
hashval ^= (hashval >> 22);
hashval += (hashval << 4);
hashval ^= (hashval >> 9);
hashval += (hashval << 10);
hashval ^= (hashval >> 2);
hashval += (hashval << 7);
hashval ^= (hashval >> 12);
#if ULONG_MAX > 4294967295
hashval += (hashval << 44);
hashval ^= (hashval >> 54);
hashval += (hashval << 36);
hashval ^= (hashval >> 41);
hashval += (hashval << 42);
hashval ^= (hashval >> 34);
hashval += (hashval << 39);
hashval ^= (hashval >> 44);
#endif
return hashval;
}
/* Hash function. Only use in string hash table. This is a
31 bit hash function. Taken from Gnome's glib,
modified to use standard C types.*/
static unsigned long hash_string(const void *key) {
const char *p = key;
unsigned int h = *p;
if (h)
for (p += 1; *p != '/0'; p++)
h = (h << 5) - h + *p;
return h;
}
/* Hash key compare function. If not give customer compare function,
use it. */
static int cmp_pointer(const void *key1, const void *key2) {
return key1 == key2;
}
/* Hash key compare function. Only use in string hash table. */
static int string_cmp_pointer(const void *key1, const void *key2) {
return !strcmp(key1, key2);
}
/* Hash table find mapping function, it is a linchpin in hash table. */
static struct hash_mapping* find_mapping(struct hashtable *ht, const void *key) {
struct hash_mapping *mapping = ht->mappings +
HASH_POSITION(key, ht->hash_func, ht->size);
if(NON_EMPTY(mapping) && !ht->test_func(mapping->key, key)) {
unsigned long i = 0;
for(; i < ht->size; ++i) {
struct hash_mapping *mp = ht->mappings + i;
if(!NON_EMPTY(mp))
mapping = mp;
if(NON_EMPTY(mp) && ht->test_func(mp->key, key)) {
mapping = mp;
break;
}
} /* Loop end. */
}
return mapping;
}
/**************************************************************************
* Hash table public functions.
**************************************************************************/
struct hashtable* hashtable_create(unsigned long size, hash_func_t hash_func,
test_func_t test_func) {
struct hashtable *ht=malloc(sizeof(struct hashtable));
unsigned long hsize = prime_size(size + 1, &ht->prime_offset);
ht->mappings = malloc(hsize * sizeof(struct hash_mapping));
memset(ht->mappings, INVALID_PTR_BYTE, hsize * sizeof(struct hash_mapping));
ht->hash_func = hash_func ? hash_func : hash_pointer;
ht->test_func = test_func ? test_func : cmp_pointer;
ht->count = 0;
ht->size = hsize;
ht->prime_offset = 0;
ht->resize_threshold = hsize * HASH_MAX_FULLNESS;
return ht;
}
struct hashtable* make_string_hashtable(unsigned long size) {
return hashtable_create(size, hash_string, string_cmp_pointer);
}
int hashtable_put(struct hashtable *ht, const void *key, void *value) {
if(ht->count >= ht->resize_threshold)
grow_hashtable(ht);
struct hash_mapping *mapping = find_mapping(ht, key);
if(NON_EMPTY(mapping))
return 0;
mapping->key = (void*)key;
mapping->value = value;
ht->count += 1;
return 1;
}
void* hashtable_get(struct hashtable *ht, const void *key) {
struct hash_mapping *mapping = find_mapping(ht, key);
return NON_EMPTY(mapping) ? mapping->value : NULL;
}
int hashtable_remove(struct hashtable *ht, const void *key) {
struct hash_mapping *mapping = find_mapping(ht, key);
if(!NON_EMPTY(mapping)) /* Not found. */
return 0;
/* Remove item. */
memset(mapping, INVALID_PTR_BYTE, sizeof(struct hash_mapping));
ht->count -= 1;
return 1;
}
int hashtable_contains(struct hashtable *ht, const void *key) {
return NON_EMPTY(find_mapping(ht, key));
}
int hashtable_set(struct hashtable *ht, const void *key, void *newvalue) {
struct hash_mapping *mapping = find_mapping(ht, key);
if(!NON_EMPTY(mapping)) /* Not exist. */
return 0;
/* Update the item. */
mapping->value = newvalue;
return 1;
}
unsigned long hashtable_count(struct hashtable *ht) {
return ht->count;
}
void hashtable_map(struct hashtable *ht,
int(*mapfunc)(void*, void*, void*), void* maparg) {
unsigned i = 0, hsize = ht->size;
struct hash_mapping *mp;
for(; i < hsize; ++i) {
mp = ht->mappings + i;
if(NON_EMPTY(mp) && !mapfunc(mp->key, mp->value, maparg))
return;
}
}
void hashtable_clear(struct hashtable *ht) {
memset(ht->mappings, INVALID_PTR_BYTE, ht->size * sizeof(struct hash_mapping));
ht->count = 0;
}
void hashtable_close(struct hashtable *ht) {
free(ht->mappings);
free(ht);
}
http://blog.csdn.net/haoel/article/details/2863
http://blog.csdn.net/aishen944/article/details/1483516(实现)
http://hi.baidu.com/zengzhaonong/blog/item/bb9ecd1b3f82d3d6ad6e759e.html(另外一种实现)