这个是常规的散列表,大小总是为素数.探测方法是简单明了的,出了BUG也好掌控.性能竟然还不错,平均1.27次探测.而python的也就1.24.
键值的类型依然是经典的'字符串-正整数'.
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#define ERROR -1
#define SUCCESS 0
#define HASH_NUMBER 5381
#define HASH_MINSIZE 7 //需要是hash_sizes里面的某个成员
#define HASH_DEFALUT_VALUE 0 //哈希表默认值
#define HASH_NEED_RESIZE(tb) (((tb)->used+(tb)->deleted)*3>=(((tb)->size)*2))
#define hash_keycmp strcmp
#define hash_keydup strdup
static const size_t hash_sizes [] = {
1, /* For 1 << 0 */
2,
3,
7,
13,
31,
61,
127,
251,
509,
1021,
2039,
4093,
8191,
16381,
32749,
65521, /* For 1 << 16 */
131071,
262139,
524287,
1048573,
2097143,
4194301,
8388593,
16777213,
33554393,
67108859,
134217689,
268435399,
536870909,
1073741789,
2147483647 /* For 1 << 31 */
};
typedef char* hash_key;
typedef size_t hash_value;
typedef enum {
BLANK, USED, DELETED,
} hash_node_status;
typedef struct {
hash_key key;
hash_value value;
hash_node_status status;
} hash_node;
typedef struct {
size_t used; //在用的总数
size_t deleted; //删除的总数
size_t size; //总数,即在用+删除+空白
size_t default_value;
hash_node *nodes; //哈希表
} hashtable;
hash_node *hash_create_nodes(size_t size) ;
int hash_free_nodes(hash_node *nds, size_t size) ;
hashtable *hash_create_table(size_t size) ; //创建字典对象
int hash_free_table(hashtable *tb) ;//清除字典
hash_node *hash_search(hashtable *tb, hash_key key) ;//常规查找,仅key存在时返回结果
int hash_update(hashtable *tb, hash_key key, hash_value value) ;//常规更新,仅key存在时执行更新
int hash_insert(hashtable *tb, hash_key key, hash_value value) ;//常规插入,仅key不存在时执行插入
int hash_delete(hashtable *tb, hash_key key) ;
hash_node *hash_force_search(hashtable *tb, hash_key key) ;//强制查找,若key不存在,则插入后再返回该key
int hash_force_update(hashtable *tb, hash_key key, hash_value value) ;//强制更新,若key不存在,则插入后再更新该key
static size_t hash(hash_key s) ;//哈希函数
static int hash_resize(hashtable *tb) ;//字典重建
static hash_node *hash_meta_search(hashtable *tb, hash_key key) ; //辅助函数
static void hash_table_status(hashtable *t) ;
static int hash_cmp(const void *a, const void *b) ;
static void hash_all_members(hashtable *t) ; //按值倒序打印全部成员
static void hash_probe_rate(hashtable *t) ;//依次查询散列表的每一个成员,得出探测冲突比率.评估查询性能.
static size_t probe_number = 0; //用于累积遍历整个散列表的冲突次数
int main() {
hashtable *tb = hash_create_table(0);
hash_node *np;
char wd[100];
while (scanf("%s\n", wd) == 1) {
np = hash_force_search(tb, wd);
if (np == NULL)
return ERROR;
np->value++;
}
hash_probe_rate(tb);
hash_all_members(tb);
hash_free_table(tb);
return SUCCESS;
}
size_t hash(hash_key s) {
size_t h = HASH_NUMBER;
for (; *s ; s++)
h = ((h << 5) + h) + *s;
return h;
}
hash_node *hash_create_nodes(size_t size) {
hash_node *nds = (hash_node*) malloc(sizeof(hash_node) * size);
if (nds == NULL)
return NULL;
size_t i = 0;
for (; i < size; i++) {
nds[i].status = BLANK;
nds[i].key = NULL;
nds[i].value = 0;
}
return nds;
}
int hash_free_nodes(hash_node *nds, size_t size) {
size_t i = 0;
for (; i < size; i++)
if (nds[i].status == USED || nds[i].status == DELETED)
free(nds[i].key);
free(nds);
return SUCCESS;
}
hashtable *hash_create_table(size_t size) {
if (size == 0)
size = HASH_MINSIZE;
hashtable *tb = (hashtable*) malloc(sizeof(hashtable));
if (tb == NULL)
return NULL;
if ((tb->nodes = hash_create_nodes(size)) == NULL)
return NULL;
tb->size = size;
tb->deleted = 0;
tb->used = 0;
tb->default_value = HASH_DEFALUT_VALUE;
return tb;
}
int hash_free_table(hashtable *tb) {
hash_free_nodes(tb->nodes,tb->size);
free(tb);
return SUCCESS;
}
static int hash_resize(hashtable *tb) {
size_t old_size = tb->size;
size_t new_size = 0;
size_t i = 0;
for (i = 0; i < 32; i++)
if (hash_sizes[i]==old_size && i<31) {
new_size = hash_sizes[i+1];
break;
}
if (new_size==0)
return ERROR;
hash_node *new_nds = hash_create_nodes(new_size);
if (new_nds == NULL)
return ERROR;
hash_node *old_nds = tb->nodes;
tb->nodes = new_nds;
tb->size = new_size;
tb->deleted = 0;
tb->used = 0;
for (i=0; i < old_size; i++)
if (old_nds[i].status == USED)
if (hash_insert(tb, old_nds[i].key, old_nds[i].value)==ERROR)
return ERROR;
hash_free_nodes(old_nds, old_size);
return SUCCESS;
}
static hash_node *hash_meta_search(hashtable *tb, hash_key key) {
size_t h = hash(key);
size_t h_size = tb->size;
int i = h % h_size;
hash_node *np0 = tb->nodes;
hash_node *np = &np0[i];
if (np->status == BLANK)
return np;
int r = hash_keycmp(np->key, key);
if (np->status == USED && !r)
return np;
if (np->status == DELETED && !r)
return np;
size_t step = 1+h%(h_size-2);
for (;;) {
probe_number++;
i -= step;
if (i<0)
i += h_size;
np = &np0[i];
if (np->status == BLANK)
return np;
r = hash_keycmp(np->key, key);
if (np->status == USED && !r)
return np;
if (np->status == DELETED && !r)
return np;
}
return NULL;
}
hash_node *hash_search(hashtable *tb, hash_key key) {
hash_node *np = hash_meta_search(tb,key);
if (np == NULL)
return NULL;
if (np->status == USED)
return np;
else
return NULL;
}
int hash_update(hashtable *tb, hash_key key, hash_value value) {
hash_node *np = hash_meta_search(tb,key);
if (np == NULL)
return ERROR;
if (np->status == USED) {
np->value = value;
return SUCCESS;
} else
return ERROR;
}
int hash_insert(hashtable *tb, hash_key key, hash_value value) {
hash_node *np = hash_meta_search(tb, key);
if (np == NULL)
return ERROR;
if (np->status == BLANK) {
if ((np->key = hash_keydup(key))==NULL)
return ERROR;
np->status = USED;
tb->used++;
np->value = value;
if (HASH_NEED_RESIZE(tb))
return hash_resize(tb);
return SUCCESS;
} else if (np->status == DELETED) {
np->status = USED;
tb->used++;
tb->deleted--;
np->value = value;
return SUCCESS;
}
return ERROR;
}
hash_node *hash_force_search(hashtable *tb, hash_key key) {
hash_node *np = hash_meta_search(tb,key);
if (np==NULL)
return NULL;
if (np->status == USED)
return np;
if (np->status == BLANK) {
if ((np->key = hash_keydup(key))==NULL)
return NULL;
tb->used++;
np->status = USED;
np->value = tb->default_value;
if (HASH_NEED_RESIZE(tb)) {
if(hash_resize(tb)==ERROR)
return NULL;
return hash_meta_search(tb, key);
} else
return np;
}
if (np->status == DELETED) {
tb->deleted--;
tb->used++;
np->status = USED;
np->value = tb->default_value;
return np;
}
return NULL;
}
int hash_force_update(hashtable *tb, hash_key key, hash_value value) {
hash_node *np = hash_force_search(tb, key);
if (np == NULL)
return ERROR;
np->value = value;
return SUCCESS;
}
int hash_delete(hashtable *tb, hash_key key) {
size_t h = hash(key);
size_t h_size = tb->size;
int i = h % h_size;
hash_node *np0 = tb->nodes;
hash_node *np = &np0[i];
int r = hash_keycmp(np->key, key);
if (np->status == USED && !r) {
np->status = DELETED;
tb->deleted++;
tb->used--;
return SUCCESS;
}
if ((np->status == DELETED && !r) || (np->status == BLANK))
return ERROR;
size_t step = 1+h%(h_size-2);
for (;;) {
probe_number++;
i -= step;
if (i<0)
i += h_size;
np = &np0[i];
if (np->status == USED && !r) {
np->status = DELETED;
tb->deleted++;
tb->used--;
return SUCCESS;
}
if ((np->status == DELETED && !r) || (np->status == BLANK))
return ERROR;
}
return ERROR;
}
static void hash_table_status(hashtable *t) {
printf("hashtable size:%lu, used:%lu, deleted:%lu\n", t->size, t->used,
t->deleted);
}
static int hash_cmp(const void *a, const void *b) {
return (*(hash_node *) a).value > (*(hash_node *) b).value ? -1 : 1;
}
static void hash_all_members(hashtable *t) {
hash_node *nds = t->nodes;
hash_node es[t->used];
size_t i = 0, j = 0, size = t->size;
for (; i < size; i++)
if (nds[i].status == USED)
es[j++] = nds[i];
qsort(es, t->used, sizeof(es[0]), hash_cmp);
for (i = 0; i < t->used; i++)
printf("%s\t%lu\n", es[i].key, es[i].value);
}
static void hash_probe_rate(hashtable *t) {
extern size_t probe_number;
probe_number = 0;
hash_node *nds = t->nodes;
size_t i = 0;
for (; i < t->size; i++)
if (nds[i].status == USED)
hash_search(t, nds[i].key);
printf("probe rate: %.2f (hashtable size: %lu, used: %lu, deleted: %lu)\n",
(probe_number + t->used) / (float) t->used, t->size, t->used,
t->deleted);
}