hash table相关2

前面的讲述了如何用链地址法实现一个哈希表,那么今天来分析一下另一种解决哈希冲突的做法,即为每个Hash值,建立一个Hash桶(Bucket),桶的容量是固定的,也就是只能处理固定次数的冲突,如1048576个Hash桶,每个桶中有4个表项(Entry),总计4M个表项。其实这两种的实现思路雷同,就是对Hash表中每个Hash值建立一个冲突表,即将冲突的几个记录以表的形式存储在其中。

大致的思路是这样的:

首先哈希桶的个数是固定的,有用户构建的时候输入,一旦构建,个数就已经固定;查找的时候首先将key值通过哈希函数获取哈希值,根据哈希值获取到对应的哈希桶,然后遍历哈希桶内的pairs数组获取。

主要的数据结构:

01 struct Pair {
02     char *key;
03     char *value;
04 };
05  
06 struct Bucket {
07     unsigned int count;
08     Pair *pairs;
09 };
10  
11 struct StrMap {
12     unsigned int count;
13     Bucket *buckets;
14 };
  • 本小节主要是学习一下国外大牛是如何实现哈希表的。完整的代码,请看: 这里,一位圣安德鲁斯大学的讲师: KRISTENSSON博客

strmap.h

001 #ifndef _STRMAP_H_
002 #define _STRMAP_H_
003  
004 #ifdef __cplusplus
005 extern "C"
006 {
007 #endif
008  
009 #include <stdlib.h>
010 #include <string.h>
011  
012 typedef struct StrMap StrMap;
013  
014 /*
015  * This callback function is called once per key-value when iterating over
016  * all keys associated to values.
017  *
018  * Parameters:
019  *
020  * key: A pointer to a null-terminated C string. The string must not
021  * be modified by the client.
022  *
023  * value: A pointer to a null-terminated C string. The string must
024  * not be modified by the client.
025  *
026  * obj: A pointer to a client-specific object. This parameter may be
027  * null.
028  *
029  * Return value: None.
030  */
031 typedef void(*sm_enum_func)(const char *key, const char *value, const void*obj);
032  
033 /*
034  * Creates a string map.
035  *
036  * Parameters:
037  *
038  * capacity: The number of top-level slots this string map
039  * should allocate. This parameter must be > 0.
040  *
041  * Return value: A pointer to a string map object,
042  * or null if a new string map could not be allocated.
043  */
044 StrMap * sm_new(unsigned int capacity);
045  
046 /*
047  * Releases all memory held by a string map object.
048  *
049  * Parameters:
050  *
051  * map: A pointer to a string map. This parameter cannot be null.
052  * If the supplied string map has been previously released, the
053  * behaviour of this function is undefined.
054  *
055  * Return value: None.
056  */
057 void sm_delete(StrMap *map);
058  
059 /*
060  * Returns the value associated with the supplied key.
061  *
062  * Parameters:
063  *
064  * map: A pointer to a string map. This parameter cannot be null.
065  *
066  * key: A pointer to a null-terminated C string. This parameter cannot
067  * be null.
068  *
069  * out_buf: A pointer to an output buffer which will contain the value,
070  * if it exists and fits into the buffer.
071  *
072  * n_out_buf: The size of the output buffer in bytes.
073  *
074  * Return value: If out_buf is set to null and n_out_buf is set to 0 the return
075  * value will be the number of bytes required to store the value (if it exists)
076  * and its null-terminator. For all other parameter configurations the return value
077  * is 1 if an associated value was found and completely copied into the output buffer,
078  * 0 otherwise.
079  */
080 int sm_get(const StrMap *map, const char *key, char *out_buf, unsigned intn_out_buf);
081  
082 /*
083  * Queries the existence of a key.
084  *
085  * Parameters:
086  *
087  * map: A pointer to a string map. This parameter cannot be null.
088  *
089  * key: A pointer to a null-terminated C string. This parameter cannot
090  * be null.
091  *
092  * Return value: 1 if the key exists, 0 otherwise.
093  */
094 int sm_exists(const StrMap *map, const char *key);
095  
096 /*
097  * Associates a value with the supplied key. If the key is already
098  * associated with a value, the previous value is replaced.
099  *
100  * Parameters:
101  *
102  * map: A pointer to a string map. This parameter cannot be null.
103  *
104  * key: A pointer to a null-terminated C string. This parameter
105  * cannot be null. The string must have a string length > 0. The
106  * string will be copied.
107  *
108  * value: A pointer to a null-terminated C string. This parameter
109  * cannot be null. The string must have a string length > 0. The
110  * string will be copied.
111  *
112  * Return value: 1 if the association succeeded, 0 otherwise.
113  */
114 int sm_put(StrMap *map, const char *key, const char *value);
115  
116 /*
117  * Returns the number of associations between keys and values.
118  *
119  * Parameters:
120  *
121  * map: A pointer to a string map. This parameter cannot be null.
122  *
123  * Return value: The number of associations between keys and values.
124  */
125 int sm_get_count(const StrMap *map);
126  
127 /*
128  * An enumerator over all associations between keys and values.
129  *
130  * Parameters:
131  *
132  * map: A pointer to a string map. This parameter cannot be null.
133  *
134  * enum_func: A pointer to a callback function that will be
135  * called by this procedure once for every key associated
136  * with a value. This parameter cannot be null.
137  *
138  * obj: A pointer to a client-specific object. This parameter will be
139  * passed back to the client's callback function. This parameter can
140  * be null.
141  *
142  * Return value: 1 if enumeration completed, 0 otherwise.
143  */
144 int sm_enum(const StrMap *map, sm_enum_func enum_func, const void *obj);
145  
146 #ifdef __cplusplus
147 }
148 #endif
149  
150 #endif

strmap.c

001 #include "strmap.h"
002  
003 typedef struct Pair Pair;
004  
005 typedef struct Bucket Bucket;
006  
007 struct Pair {
008     char *key;
009     char *value;
010 };
011  
012 struct Bucket {
013     unsigned int count;
014     Pair *pairs;
015 };
016  
017 struct StrMap {
018     unsigned int count;
019     Bucket *buckets;
020 };
021  
022 static Pair * get_pair(Bucket *bucket, const char *key);
023 static unsigned long hash(const char *str);
024  
025 StrMap * sm_new(unsigned int capacity)
026 {
027     StrMap *map;
028      
029     map = malloc(sizeof(StrMap));
030     if (map == NULL) {
031         return NULL;
032     }
033     map->count = capacity;
034     map->buckets = malloc(map->count * sizeof(Bucket));
035     if (map->buckets == NULL) {
036         free(map);
037         return NULL;
038     }
039     memset(map->buckets, 0, map->count * sizeof(Bucket));
040     return map;
041 }
042  
043 void sm_delete(StrMap *map)
044 {
045     unsigned int i, j, n, m;
046     Bucket *bucket;
047     Pair *pair;
048  
049     if (map == NULL) {
050         return;
051     }
052     n = map->count;
053     bucket = map->buckets;
054     i = 0;
055     while (i < n) {
056         m = bucket->count;
057         pair = bucket->pairs;
058         j = 0;
059         while(j < m) {
060             free(pair->key);
061             free(pair->value);
062             pair++;
063             j++;
064         }
065         free(bucket->pairs);
066         bucket++;
067         i++;
068     }
069     free(map->buckets);
070     free(map);
071 }
072  
073 int sm_get(const StrMap *map, const char *key, char *out_buf, unsigned intn_out_buf)
074 {
075     unsigned int index;
076     Bucket *bucket;
077     Pair *pair;
078  
079     if (map == NULL) {
080         return 0;
081     }
082     if (key == NULL) {
083         return 0;
084     }
085     index = hash(key) % map->count;
086     bucket = &(map->buckets[index]);
087     pair = get_pair(bucket, key);
088     if (pair == NULL) {
089         return 0;
090     }
091     if (out_buf == NULL && n_out_buf == 0) {
092         return strlen(pair->value) + 1;
093     }
094     if (out_buf == NULL) {
095         return 0;
096     }
097     if (strlen(pair->value) >= n_out_buf) {
098         return 0;
099     }
100     strcpy(out_buf, pair->value);
101     return 1;
102 }
103  
104 int sm_exists(const StrMap *map, const char *key)
105 {
106     unsigned int index;
107     Bucket *bucket;
108     Pair *pair;
109  
110     if (map == NULL) {
111         return 0;
112     }
113     if (key == NULL) {
114         return 0;
115     }
116     index = hash(key) % map->count;
117     bucket = &(map->buckets[index]);
118     pair = get_pair(bucket, key);
119     if (pair == NULL) {
120         return 0;
121     }
122     return 1;
123 }
124  
125 int sm_put(StrMap *map, const char *key, const char *value)
126 {
127     unsigned int key_len, value_len, index;
128     Bucket *bucket;
129     Pair *tmp_pairs, *pair;
130     char *tmp_value;
131     char *new_key, *new_value;
132  
133     if (map == NULL) {
134         return 0;
135     }
136     if (key == NULL || value == NULL) {
137         return 0;
138     }
139     key_len = strlen(key);
140     value_len = strlen(value);
141     /* Get a pointer to the bucket the key string hashes to */
142     index = hash(key) % map->count;
143     bucket = &(map->buckets[index]);
144     /* Check if we can handle insertion by simply replacing
145      * an existing value in a key-value pair in the bucket.
146      */
147     if ((pair = get_pair(bucket, key)) != NULL) {
148         /* The bucket contains a pair that matches the provided key,
149          * change the value for that pair to the new value.
150          */
151         if (strlen(pair->value) < value_len) {
152             /* If the new value is larger than the old value, re-allocate
153              * space for the new larger value.
154              */
155             tmp_value = realloc(pair->value, (value_len + 1) *sizeof(char));
156             if (tmp_value == NULL) {
157                 return 0;
158             }
159             pair->value = tmp_value;
160         }
161         /* Copy the new value into the pair that matches the key */
162         strcpy(pair->value, value);
163         return 1;
164     }
165     /* Allocate space for a new key and value */
166     new_key = malloc((key_len + 1) * sizeof(char));
167     if (new_key == NULL) {
168         return 0;
169     }
170     new_value = malloc((value_len + 1) * sizeof(char));
171     if (new_value == NULL) {
172         free(new_key);
173         return 0;
174     }
175     /* Create a key-value pair */
176     if (bucket->count == 0) {
177         /* The bucket is empty, lazily allocate space for a single
178          * key-value pair.
179          */
180         bucket->pairs = malloc(sizeof(Pair));
181         if (bucket->pairs == NULL) {
182             free(new_key);
183             free(new_value);
184             return 0;
185         }
186         bucket->count = 1;
187     }
188     else {
189         /* The bucket wasn't empty but no pair existed that matches the provided
190          * key, so create a new key-value pair.
191          */
192         tmp_pairs = realloc(bucket->pairs, (bucket->count + 1) *sizeof(Pair));
193         if (tmp_pairs == NULL) {
194             free(new_key);
195             free(new_value);
196             return 0;
197         }
198         bucket->pairs = tmp_pairs;
199         bucket->count++;
200     }
201     /* Get the last pair in the chain for the bucket */
202     pair = &(bucket->pairs[bucket->count - 1]);
203     pair->key = new_key;
204     pair->value = new_value;
205     /* Copy the key and its value into the key-value pair */
206     strcpy(pair->key, key);
207     strcpy(pair->value, value);
208     return 1;
209 }
210  
211 int sm_get_count(const StrMap *map)
212 {
213     unsigned int i, j, n, m;
214     unsigned int count;
215     Bucket *bucket;
216     Pair *pair;
217  
218     if (map == NULL) {
219         return 0;
220     }
221     bucket = map->buckets;
222     n = map->count;
223     i = 0;
224     count = 0;
225     while (i < n) {
226         pair = bucket->pairs;
227         m = bucket->count;
228         j = 0;
229         while (j < m) {
230             count++;
231             pair++;
232             j++;
233         }
234         bucket++;
235         i++;
236     }
237     return count;
238 }
239  
240 int sm_enum(const StrMap *map, sm_enum_func enum_func, const void *obj)
241 {
242     unsigned int i, j, n, m;
243     Bucket *bucket;
244     Pair *pair;
245  
246     if (map == NULL) {
247         return 0;
248     }
249     if (enum_func == NULL) {
250         return 0;
251     }
252     bucket = map->buckets;
253     n = map->count;
254     i = 0;
255     while (i < n) {
256         pair = bucket->pairs;
257         m = bucket->count;
258         j = 0;
259         while (j < m) {
260             enum_func(pair->key, pair->value, obj);
261             pair++;
262             j++;
263         }
264         bucket++;
265         i++;
266     }
267     return 1;
268 }
269  
270 /*
271  * Returns a pair from the bucket that matches the provided key,
272  * or null if no such pair exist.
273  */
274 static Pair * get_pair(Bucket *bucket, const char *key)
275 {
276     unsigned int i, n;
277     Pair *pair;
278  
279     n = bucket->count;
280     if (n == 0) {
281         return NULL;
282     }
283     pair = bucket->pairs;
284     i = 0;
285     while (i < n) {
286         if (pair->key != NULL && pair->value != NULL) {
287             if (strcmp(pair->key, key) == 0) {
288                 return pair;
289             }
290         }
291         pair++;
292         i++;
293     }
294     return NULL;
295 }
296  
297 /*
298  * Returns a hash code for the provided string.
299  */
300 static unsigned long hash(const char *str)
301 {
302     unsigned long hash = 5381;
303     int c;
304  
305     while (c = *str++) {
306         hash = ((hash << 5) + hash) + c;
307     }
308     return hash;
309 }

前一节与这节这两种实现方法看似比较类似,但也有差异:

基于哈希桶的情况下,由于Hash桶容量的限制,所以,有可能发生Hash表填不满的情况,也就是,虽然Hash表里面还有空位,但是新建的表项由于冲突过多,而不能装入Hash表中。不过,这样的实现也有其好处,就是查表的最大开销是可以确定的,因为最多处理的冲突数是确定的,所以算法的时间复杂度为O(1)+O(m),其中m为Hash桶容量。

而另一种通过链表的实现,由于Hash桶的容量是无限的,因此,只要没有超出Hash表的最大容量,就能够容纳新建的表项。但是,一旦发生了Hash冲突严重的情况,就会造成Hash桶的链表过长,大大降低查找效率。在最坏的情况下,时间复杂度退化为O(n),其中n为Hash表的总容量。当然,这种情况的概率小之又小,几乎是可以忽略的。


  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值