Hash函数与取模运算

最新推荐文章于 2022-09-11 11:53:10 发布

百云在飘

最新推荐文章于 2022-09-11 11:53:10 发布

阅读量1.2w

点赞数 2

分类专栏： C语言编程文章标签： struct iterator null table associations function

本文链接：https://blog.csdn.net/wzhwho/article/details/5561410

版权

C语言编程专栏收录该内容

27 篇文章 0 订阅

订阅专栏

1. hash 函数

哈希表也称散列表，是一种数据结构，它可以提供快速的插入操作和查找操作，不论有多少数据项，插入与删除只需要接近常量的时间 :O(1) 时间级。但哈希表也有缺点，它是基于数组的，数组一旦被创建，就难以扩展。某些哈希表被填满时，性能急剧下降。，所以程序员必须清楚表中要存储多少数据。

哈希表操作的平均时间是基于统计特性而不是随机输入的期望值。

哈希表的重要特性就是哈希函数。

我们用一个将大数 ( 或解释为数的字符串 ) 映射成一个较小的，更容易管理的数的函数来达到这个目的。将一个项映射成一个较小的下标的函数称为哈希函数。

需要哈希函数，是因为哈希表是基于数组的，而且关键字值的范围通常比数组容量大。，关键字值通过哈希函数映射为数组的下标 。

哈希函数一般是通过取模运算来实现( key = tag % constant) ，这就是一个简单的哈希函数。但也带来了不可避免的麻烦 : 两个或更多的不同项可能被散列到同一个位置，引起冲突。

2. 减少冲突

2.1. 开放地址法

开放地址法 : 让指定的数组大小两倍于需要存储的数据量。因此，可能一半的单元是空的。当冲突发生时，一个方法是通过系统的方法找到数组的一个空位，并把这个数据填入，而不再用哈希函数得到的数组下标。

在开放地址法中如果数据不能直接放在由哈希函数中，就得寻找空白位置，其中简单的三种探索方法。

线性探索法，二次探索法，再哈希法。

2.2. 链地址法

链地址法 : 创建一个存放数据链表的数组，数组内不直接存储数据。这样，当发生冲突的时候，新的数据项直接接到这个数组下标所指的链表中。

在链地址法中，数据项的关键字值还是映射到数组下标，而数据本身保存在每个单元的链表中。

当然如果链表中有许多项，存取时间变长，找到初始单元的时间为 O(1) ，而搜索链表的时间与链表中的数据项数目 M 成正比，为 O(M) 。因此并不希望链表太满。

3. 经验

专家发现以下哈希函数形式很好 :

key = (tag % constant ) 其中 constant 为质数，并且小于哈希表容量。这样哈希值就分布的比较均匀，效率就不会受影响。

3. 开源项目哈希实现

hashtable.h

/* * A hash table (hashtab) maintains associations between * key values and datum values. The type of the key values * and the type of the datum values is arbitrary. The * functions for hash computation and key comparison are * provided by the creator of the table. * * Author : Stephen Smalley, <sds@epoch.ncsc.mil> */ #ifndef _SS_HASHTAB_H_ #define _SS_HASHTAB_H_ #define HASHTAB_MAX_NODES 0xffffffff struct hashtab_node { void *key; void *datum; struct hashtab_node *next; }; struct hashtab { struct hashtab_node **htable; /* hash table */ unsigned long size; /* number of slots in hash table */ unsigned long nel; /* number of elements in hash table */ unsigned long (*hash_value)(struct hashtab *h, void *key); /* hash function */ int (*keycmp)(struct hashtab *h, void *key1, void *key2); /* key comparison function */ }; struct hashtab_info { unsigned long slots_used; unsigned long max_chain_len; }; /* * Creates a new hash table with the specified characteristics. * * Returns NULL if insufficent space is available or * the new hash table otherwise. */ struct hashtab *hashtab_create(unsigned long (*hash_value)(struct hashtab *h, void *key), int (*keycmp)(struct hashtab *h, void *key1, void *key2), unsigned long size); /* * Inserts the specified (key, datum) pair into the specified hash table. * * Returns -ENOMEM on memory allocation error, * -EEXIST if there is already an entry with the same key, * -EINVAL for general errors or * 0 otherwise. */ int hashtab_insert(struct hashtab *h, void *k, void *d); /* * Deletes the specified (key, datum) pair into the specified hash table. * * Returns * pointer to datum on success * NULL if key not found * */ void * hashtab_delete(struct hashtab *h, void *k); /* * Searches for the entry with the specified key in the hash table. * * Returns NULL if no entry has the specified key or * the datum of the entry otherwise. */ void *hashtab_search(struct hashtab *h, void *k); /* * Destroys the specified hash table. */ void hashtab_destroy(struct hashtab *h, void (*key_free)(void *ptr), void (*datum_free)(void *ptr) ); /* * Applies the specified apply function to (key,datum,args) * for each entry in the specified hash table. * * The order in which the function is applied to the entries * is dependent upon the internal structure of the hash table. * * If apply returns a non-zero status, then hashtab_map will cease * iterating through the hash table and will propagate the error * return to its caller. */ int hashtab_map(struct hashtab *h, int (*apply)(void *k, void *d, void *args), void *args); /* Fill info with some hash table statistics */ void hashtab_stat(struct hashtab *h, struct hashtab_info *info); /* print out hashtable node */ void hashtab_print(struct hashtab *h, void (*print)(void *key, void *data)); /* Iterate through the hashtable */ typedef struct hashtab_iterator { int slot_id; struct hashtab_node *node_ptr; } hashtab_iterator; hashtab_iterator * hashtab_iterate(struct hashtab *h, hashtab_iterator *iterator); #endif /* _SS_HASHTAB_H */

hashtable.c

/* * Implementation of the hash table type. * * Author : Stephen Smalley, <sds@epoch.ncsc.mil> */ #include <stdio.h> #include <stdlib.h> #include <errno.h> #include <hashtable.h> struct hashtab *hashtab_create(unsigned long (*hash_value)(struct hashtab *h, void *key), int (*keycmp)(struct hashtab *h, void *key1, void *key2), unsigned long size) { struct hashtab *p; unsigned long i; p = (struct hashtab *) malloc(sizeof(*p)); if (p == NULL) return p; p->size = size; p->nel = 0; p->hash_value = hash_value; p->keycmp = keycmp; p->htable = malloc(sizeof(*(p->htable)) * size); if (p->htable == NULL) { perror("malloc"); free(p); return NULL; } for (i = 0; i < size; i++) p->htable[i] = NULL; return p; } int hashtab_insert(struct hashtab *h, void *key, void *datum) { unsigned long hvalue; struct hashtab_node *prev, *cur, *newnode; if (!h || h->nel == HASHTAB_MAX_NODES) return -EINVAL; hvalue = h->hash_value(h, key); prev = NULL; cur = h->htable[hvalue]; while (cur && h->keycmp(h, key, cur->key) > 0) { //fprintf(stdout, "keycmp: key1: (%x) %s key2: (%x) %s/n", key, key, cur->key, cur->key); prev = cur; cur = cur->next; } if (cur && (h->keycmp(h, key, cur->key) == 0)) return -EEXIST; newnode = malloc(sizeof(*newnode)); if (newnode == NULL) return -ENOMEM; newnode->key = key; newnode->datum = datum; if (prev) { newnode->next = prev->next; prev->next = newnode; } else { newnode->next = h->htable[hvalue]; h->htable[hvalue] = newnode; } h->nel++; return 0; } void * hashtab_delete(struct hashtab *h, void *key) { unsigned long hvalue; void *d; struct hashtab_node *prev; struct hashtab_node *cur; if(!h) return NULL; hvalue = h->hash_value(h, key); cur = h->htable[hvalue]; /* if need to rem first node */ if(cur != NULL && h->keycmp(h, key, cur->key) == 0) { h->htable[hvalue] = cur->next; cur->next = 0; d = cur->datum; free(cur); h->nel--; return d; } /* some node after first node */ /* trying to avoid SEGFAULT : Gokul */ if(cur != NULL) { prev = cur; cur = cur->next; while(cur != NULL) { if(h->keycmp(h, key, cur->key) == 0) { prev->next = cur->next; cur->next = NULL; d = cur->datum; h->nel--; return d; break; } else { prev = cur; cur = cur->next; } } // end while } else { fprintf(stderr, "cur is null where it shouldn't be !!!!!/n"); } return NULL; } void *hashtab_search(struct hashtab *h, void *key) { unsigned long hvalue; struct hashtab_node *cur; if (!h) return NULL; hvalue = h->hash_value(h, key); cur = h->htable[hvalue]; while (cur != NULL && h->keycmp(h, key, cur->key) > 0) cur = cur->next; if (cur == NULL || (h->keycmp(h, key, cur->key) != 0)) return NULL; return cur->datum; } void hashtab_destroy(struct hashtab *h, void (*key_free)(void *ptr), void (*datum_free)(void *ptr) ) { unsigned long i; struct hashtab_node *cur, *temp; if (!h) return; for (i = 0; i < h->size; i++) { cur = h->htable[i]; while (cur != NULL) { temp = cur; cur = cur->next; free(temp); } h->htable[i] = NULL; } free(h->htable); h->htable = NULL; free(h); } int hashtab_map(struct hashtab *h, int (*apply)(void *k, void *d, void *args), void *args) { unsigned long i; int ret; struct hashtab_node *cur; if (!h) return 0; for (i = 0; i < h->size; i++) { cur = h->htable[i]; while (cur != NULL) { ret = apply(cur->key, cur->datum, args); if (ret) return ret; cur = cur->next; } } return 0; } void hashtab_stat(struct hashtab *h, struct hashtab_info *info) { unsigned long i, chain_len, slots_used, max_chain_len; struct hashtab_node *cur; slots_used = 0; max_chain_len = 0; for (slots_used = max_chain_len = i = 0; i < h->size; i++) { cur = h->htable[i]; if (cur) { slots_used++; chain_len = 0; while (cur) { chain_len++; cur = cur->next; } if (chain_len > max_chain_len) max_chain_len = chain_len; } } info->slots_used = slots_used; info->max_chain_len = max_chain_len; } void hashtab_print(struct hashtab *h, void (*print)(void *key, void *data)) { unsigned long i; struct hashtab_node *cur, *temp; int count = 0; if (!h) return; for (i = 0; i < h->size; i++) { cur = h->htable[i]; printf("SLOT [%lu]:", i); while (cur != NULL) { printf("[%x]", (unsigned) cur); count++; temp = cur; cur = cur->next; } printf("/n"); } fprintf(stdout, "Total items: %d/n", count); } hashtab_iterator * hashtab_iterate(struct hashtab *h, hashtab_iterator *iterator) { struct hashtab_node *cur = NULL; if(iterator == NULL) { iterator = (hashtab_iterator *) malloc(sizeof(hashtab_iterator)); iterator->slot_id = 0; iterator->node_ptr = NULL; } /* find the next node */ if(iterator->node_ptr != NULL) cur = iterator->node_ptr->next; else cur = h->htable[0]; int level = iterator->slot_id; while(level < h->size) { if(cur != NULL) { iterator->slot_id = level; iterator->node_ptr = cur; return iterator; } //fprintf(stdout, "checking level %d/n", level); level ++; if(level < h->size) { cur = h->htable[level]; } else cur = NULL; } free(iterator); return NULL; } /*********************************************** * 驱动 ************************************************/ static uint32_t hash(struct hashtable *h, void * key) { (void)h; return (uint32_t)(uint64_t)key % 257; } static int cmp(struct hashtable *h, void * key1, void * key2) { (void)h; return (int)((long)key1 - (long)key2); }

百云在飘

关注

2
点赞
踩
4

收藏

觉得还不错? 一键收藏
0
评论
Hash函数与取模运算

1. hash函数哈希表也称散列表，是一种数据结构，它可以提供快速的插入操作和查找操作，不论有多少数据项，插入与删除只需要接近常量的时间:O(1)时间级。但哈希表也有缺点，它是基于数组的，数组一旦被创建，就难以扩展。某些哈希表被填满时，性能急剧下降。，所以程序员必
复制链接

扫一扫