C语言散列表普通探测版

最新推荐文章于 2022-07-21 17:06:45 发布
lispythonic
最新推荐文章于 2022-07-21 17:06:45 发布
阅读量850
点赞数
分类专栏： C 算法
本文链接：https://blog.csdn.net/napo1987/article/details/37780505
版权
算法同时被 2 个专栏收录
11 篇文章 0 订阅
订阅专栏
4 篇文章 0 订阅
订阅专栏
这个是常规的散列表,大小总是为素数.探测方法是简单明了的,出了BUG也好掌控.性能竟然还不错,平均1.27次探测.而python的也就1.24.
键值的类型依然是经典的'字符串-正整数'.
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#define ERROR -1
#define SUCCESS 0
#define HASH_NUMBER 5381
#define HASH_MINSIZE  7 //需要是hash_sizes里面的某个成员
#define HASH_DEFALUT_VALUE 0 //哈希表默认值
#define HASH_NEED_RESIZE(tb) (((tb)->used+(tb)->deleted)*3>=(((tb)->size)*2))
#define hash_keycmp strcmp
#define hash_keydup strdup

static const size_t hash_sizes [] = {
    1,          /* For 1 << 0 */
    2,
    3,
    7,
    13,
    31,
    61,
    127,
    251,
    509,
    1021,
    2039,
    4093,
    8191,
    16381,
    32749,
    65521,      /* For 1 << 16 */
    131071,
    262139,
    524287,
    1048573,
    2097143,
    4194301,
    8388593,
    16777213,
    33554393,
    67108859,
    134217689,
    268435399,
    536870909,
    1073741789,
    2147483647  /* For 1 << 31 */
};
typedef char* hash_key;
typedef size_t hash_value;
typedef enum {
    BLANK, USED, DELETED,
} hash_node_status;
typedef struct {
    hash_key key;
    hash_value value;
    hash_node_status status;
} hash_node;
typedef struct {
    size_t used; //在用的总数
    size_t deleted; //删除的总数
    size_t size; //总数,即在用+删除+空白
    size_t default_value;
    hash_node *nodes; //哈希表
} hashtable;

hash_node *hash_create_nodes(size_t size) ;
int hash_free_nodes(hash_node *nds, size_t size) ;
hashtable *hash_create_table(size_t size) ; //创建字典对象
int hash_free_table(hashtable *tb) ;//清除字典
hash_node *hash_search(hashtable *tb, hash_key key) ;//常规查找,仅key存在时返回结果
int hash_update(hashtable *tb, hash_key key, hash_value value) ;//常规更新,仅key存在时执行更新
int hash_insert(hashtable *tb, hash_key key, hash_value value) ;//常规插入,仅key不存在时执行插入
int hash_delete(hashtable *tb, hash_key key) ;
hash_node *hash_force_search(hashtable *tb, hash_key key) ;//强制查找,若key不存在,则插入后再返回该key
int hash_force_update(hashtable *tb, hash_key key, hash_value value) ;//强制更新,若key不存在,则插入后再更新该key
static size_t hash(hash_key s) ;//哈希函数
static int hash_resize(hashtable *tb) ;//字典重建
static hash_node *hash_meta_search(hashtable *tb, hash_key key) ; //辅助函数
static void hash_table_status(hashtable *t) ;
static int hash_cmp(const void *a, const void *b) ;
static void hash_all_members(hashtable *t) ; //按值倒序打印全部成员
static void hash_probe_rate(hashtable *t) ;//依次查询散列表的每一个成员,得出探测冲突比率.评估查询性能.
static size_t probe_number = 0; //用于累积遍历整个散列表的冲突次数

int main() {
    hashtable *tb = hash_create_table(0);
    hash_node *np;
    char wd[100];
    while (scanf("%s\n", wd) == 1) {
        np = hash_force_search(tb, wd);
        if (np == NULL)
            return ERROR;
        np->value++;
    }
    hash_probe_rate(tb);
    hash_all_members(tb);
    hash_free_table(tb);
    return SUCCESS;
}

size_t hash(hash_key s) {
    size_t h = HASH_NUMBER;
    for (; *s ; s++)
        h = ((h << 5) + h) + *s;
    return h;
}

hash_node *hash_create_nodes(size_t size) {
    hash_node *nds = (hash_node*) malloc(sizeof(hash_node) * size);
    if (nds == NULL)
        return NULL;
    size_t i = 0;
    for (; i < size; i++) {
        nds[i].status = BLANK;
        nds[i].key = NULL;
        nds[i].value = 0;
    }
    return nds;
}

int hash_free_nodes(hash_node *nds, size_t size) {
    size_t i = 0;
    for (; i < size; i++)
        if (nds[i].status == USED || nds[i].status == DELETED)
            free(nds[i].key);
    free(nds);
    return SUCCESS;
}

hashtable *hash_create_table(size_t size) {
    if (size == 0)
        size = HASH_MINSIZE;
    hashtable *tb = (hashtable*) malloc(sizeof(hashtable));
    if (tb == NULL)
        return NULL;
    if ((tb->nodes = hash_create_nodes(size)) == NULL)
        return NULL;
    tb->size = size;
    tb->deleted = 0;
    tb->used = 0;
    tb->default_value = HASH_DEFALUT_VALUE;
    return tb;
}

int hash_free_table(hashtable *tb) {
    hash_free_nodes(tb->nodes,tb->size);
    free(tb);
    return SUCCESS;
}

static int hash_resize(hashtable *tb) {
    size_t old_size = tb->size;
    size_t new_size = 0;
    size_t i = 0;
    for (i = 0; i < 32; i++)
        if (hash_sizes[i]==old_size && i<31) {
            new_size = hash_sizes[i+1];
            break;
        }
    if (new_size==0)
        return ERROR;
    hash_node *new_nds = hash_create_nodes(new_size);
    if (new_nds == NULL)
        return ERROR;
    hash_node *old_nds = tb->nodes;
    tb->nodes = new_nds;
    tb->size = new_size;
    tb->deleted = 0;
    tb->used = 0;
    for (i=0; i < old_size; i++)
        if (old_nds[i].status == USED)
            if (hash_insert(tb, old_nds[i].key, old_nds[i].value)==ERROR)
                return ERROR;
    hash_free_nodes(old_nds, old_size);
    return SUCCESS;
}

static hash_node *hash_meta_search(hashtable *tb, hash_key key) {
    size_t h = hash(key);
    size_t h_size = tb->size;
    int i = h % h_size;
    hash_node *np0 = tb->nodes;
    hash_node *np = &np0[i];
    if (np->status == BLANK)
        return np;
    int r = hash_keycmp(np->key, key);
    if (np->status == USED && !r)
        return np;
    if (np->status == DELETED && !r)
        return np;
    size_t step = 1+h%(h_size-2);
    for (;;) {
        probe_number++;
        i -= step;
        if (i<0)
            i += h_size;
        np = &np0[i];
        if (np->status == BLANK)
            return np;
        r = hash_keycmp(np->key, key);
        if (np->status == USED && !r)
            return np;
        if (np->status == DELETED && !r)
            return np;
    }
    return NULL;
}

hash_node *hash_search(hashtable *tb, hash_key key) {
    hash_node *np = hash_meta_search(tb,key);
    if (np == NULL)
        return NULL;
    if (np->status == USED)
        return np;
    else
        return NULL;
}

int hash_update(hashtable *tb, hash_key key, hash_value value) {
    hash_node *np = hash_meta_search(tb,key);
    if (np == NULL)
        return ERROR;
    if (np->status == USED) {
        np->value = value;
        return SUCCESS;
    } else
        return ERROR;
}

int hash_insert(hashtable *tb, hash_key key, hash_value value) {
    hash_node *np = hash_meta_search(tb, key);
    if (np == NULL)
        return ERROR;
    if (np->status == BLANK) {
        if ((np->key = hash_keydup(key))==NULL)
            return ERROR;
        np->status = USED;
        tb->used++;
        np->value = value;
        if (HASH_NEED_RESIZE(tb))
            return hash_resize(tb);
        return SUCCESS;
    } else if (np->status == DELETED) {
        np->status = USED;
        tb->used++;
        tb->deleted--;
        np->value = value;
        return SUCCESS;
    }
    return ERROR;
}

hash_node *hash_force_search(hashtable *tb, hash_key key) {
    hash_node *np = hash_meta_search(tb,key);
    if (np==NULL)
        return NULL;
    if (np->status == USED)
        return np;
    if (np->status == BLANK) {
        if ((np->key = hash_keydup(key))==NULL)
            return NULL;
        tb->used++;
        np->status = USED;
        np->value = tb->default_value;
        if (HASH_NEED_RESIZE(tb)) {
            if(hash_resize(tb)==ERROR)
                return NULL;
            return hash_meta_search(tb, key);
        } else
            return np;
    }
    if (np->status == DELETED) {
        tb->deleted--;
        tb->used++;
        np->status = USED;
        np->value = tb->default_value;
        return np;
    }
    return NULL;
}

int hash_force_update(hashtable *tb, hash_key key, hash_value value) {
    hash_node *np = hash_force_search(tb, key);
    if (np == NULL)
        return ERROR;
    np->value = value;
    return SUCCESS;
}

int hash_delete(hashtable *tb, hash_key key) {
    size_t h = hash(key);
    size_t h_size = tb->size;
    int i = h % h_size;
    hash_node *np0 = tb->nodes;
    hash_node *np = &np0[i];
    int r = hash_keycmp(np->key, key);
    if (np->status == USED && !r) {
        np->status = DELETED;
        tb->deleted++;
        tb->used--;
        return SUCCESS;
    }
    if ((np->status == DELETED && !r) || (np->status == BLANK))
        return ERROR;
    size_t step = 1+h%(h_size-2);
    for (;;) {
        probe_number++;
        i -= step;
        if (i<0)
            i += h_size;
        np = &np0[i];
        if (np->status == USED && !r) {
            np->status = DELETED;
            tb->deleted++;
            tb->used--;
            return SUCCESS;
        }
        if ((np->status == DELETED && !r) || (np->status == BLANK))
            return ERROR;
    }
    return ERROR;
}

static void hash_table_status(hashtable *t) {
    printf("hashtable size:%lu, used:%lu, deleted:%lu\n", t->size, t->used,
           t->deleted);
}

static int hash_cmp(const void *a, const void *b) {
    return (*(hash_node *) a).value > (*(hash_node *) b).value ? -1 : 1;
}

static void hash_all_members(hashtable *t) {
    hash_node *nds = t->nodes;
    hash_node es[t->used];
    size_t i = 0, j = 0, size = t->size;
    for (; i < size; i++)
        if (nds[i].status == USED)
            es[j++] = nds[i];
    qsort(es, t->used, sizeof(es[0]), hash_cmp);
    for (i = 0; i < t->used; i++)
        printf("%s\t%lu\n", es[i].key, es[i].value);
}

static void hash_probe_rate(hashtable *t) {
    extern size_t probe_number;
    probe_number = 0;
    hash_node *nds = t->nodes;
    size_t i = 0;
    for (; i < t->size; i++)
        if (nds[i].status == USED)
            hash_search(t, nds[i].key);
    printf("probe rate: %.2f (hashtable size: %lu, used: %lu, deleted: %lu)\n",
           (probe_number + t->used) / (float) t->used, t->size, t->used,
           t->deleted);
}