前面的讲述了如何用链地址法实现一个哈希表,那么今天来分析一下另一种解决哈希冲突的做法,即为每个Hash值,建立一个Hash桶(Bucket),桶的容量是固定的,也就是只能处理固定次数的冲突,如1048576个Hash桶,每个桶中有4个表项(Entry),总计4M个表项。其实这两种的实现思路雷同,就是对Hash表中每个Hash值建立一个冲突表,即将冲突的几个记录以表的形式存储在其中。
大致的思路是这样的:
![](https://i-blog.csdnimg.cn/blog_migrate/b7bb422d0cd2303115d09911e3386648.png)
首先哈希桶的个数是固定的,有用户构建的时候输入,一旦构建,个数就已经固定;查找的时候首先将key值通过哈希函数获取哈希值,根据哈希值获取到对应的哈希桶,然后遍历哈希桶内的pairs数组获取。
主要的数据结构:
01 | struct Pair { |
02 | char *key; |
03 | char *value; |
04 | }; |
05 |
06 | struct Bucket { |
07 | unsigned int count; |
08 | Pair *pairs; |
09 | }; |
10 |
11 | struct StrMap { |
12 | unsigned int count; |
13 | Bucket *buckets; |
14 | }; |
strmap.h
001 | #ifndef _STRMAP_H_ |
002 | #define _STRMAP_H_ |
003 |
004 | #ifdef __cplusplus |
005 | extern "C" |
006 | { |
007 | #endif |
008 |
009 | #include <stdlib.h> |
010 | #include <string.h> |
011 |
012 | typedef struct StrMap StrMap; |
013 |
014 | /* |
015 | * This callback function is called once per key-value when iterating over |
016 | * all keys associated to values. |
017 | * |
018 | * Parameters: |
019 | * |
020 | * key: A pointer to a null-terminated C string. The string must not |
021 | * be modified by the client. |
022 | * |
023 | * value: A pointer to a null-terminated C string. The string must |
024 | * not be modified by the client. |
025 | * |
026 | * obj: A pointer to a client-specific object. This parameter may be |
027 | * null. |
028 | * |
029 | * Return value: None. |
030 | */ |
031 | typedef void (*sm_enum_func)( const char *key, const char *value, const void *obj); |
032 |
033 | /* |
034 | * Creates a string map. |
035 | * |
036 | * Parameters: |
037 | * |
038 | * capacity: The number of top-level slots this string map |
039 | * should allocate. This parameter must be > 0. |
040 | * |
041 | * Return value: A pointer to a string map object, |
042 | * or null if a new string map could not be allocated. |
043 | */ |
044 | StrMap * sm_new(unsigned int capacity); |
045 |
046 | /* |
047 | * Releases all memory held by a string map object. |
048 | * |
049 | * Parameters: |
050 | * |
051 | * map: A pointer to a string map. This parameter cannot be null. |
052 | * If the supplied string map has been previously released, the |
053 | * behaviour of this function is undefined. |
054 | * |
055 | * Return value: None. |
056 | */ |
057 | void sm_delete(StrMap *map); |
058 |
059 | /* |
060 | * Returns the value associated with the supplied key. |
061 | * |
062 | * Parameters: |
063 | * |
064 | * map: A pointer to a string map. This parameter cannot be null. |
065 | * |
066 | * key: A pointer to a null-terminated C string. This parameter cannot |
067 | * be null. |
068 | * |
069 | * out_buf: A pointer to an output buffer which will contain the value, |
070 | * if it exists and fits into the buffer. |
071 | * |
072 | * n_out_buf: The size of the output buffer in bytes. |
073 | * |
074 | * Return value: If out_buf is set to null and n_out_buf is set to 0 the return |
075 | * value will be the number of bytes required to store the value (if it exists) |
076 | * and its null-terminator. For all other parameter configurations the return value |
077 | * is 1 if an associated value was found and completely copied into the output buffer, |
078 | * 0 otherwise. |
079 | */ |
080 | int sm_get( const StrMap *map, const char *key, char *out_buf, unsigned int n_out_buf); |
081 |
082 | /* |
083 | * Queries the existence of a key. |
084 | * |
085 | * Parameters: |
086 | * |
087 | * map: A pointer to a string map. This parameter cannot be null. |
088 | * |
089 | * key: A pointer to a null-terminated C string. This parameter cannot |
090 | * be null. |
091 | * |
092 | * Return value: 1 if the key exists, 0 otherwise. |
093 | */ |
094 | int sm_exists( const StrMap *map, const char *key); |
095 |
096 | /* |
097 | * Associates a value with the supplied key. If the key is already |
098 | * associated with a value, the previous value is replaced. |
099 | * |
100 | * Parameters: |
101 | * |
102 | * map: A pointer to a string map. This parameter cannot be null. |
103 | * |
104 | * key: A pointer to a null-terminated C string. This parameter |
105 | * cannot be null. The string must have a string length > 0. The |
106 | * string will be copied. |
107 | * |
108 | * value: A pointer to a null-terminated C string. This parameter |
109 | * cannot be null. The string must have a string length > 0. The |
110 | * string will be copied. |
111 | * |
112 | * Return value: 1 if the association succeeded, 0 otherwise. |
113 | */ |
114 | int sm_put(StrMap *map, const char *key, const char *value); |
115 |
116 | /* |
117 | * Returns the number of associations between keys and values. |
118 | * |
119 | * Parameters: |
120 | * |
121 | * map: A pointer to a string map. This parameter cannot be null. |
122 | * |
123 | * Return value: The number of associations between keys and values. |
124 | */ |
125 | int sm_get_count( const StrMap *map); |
126 |
127 | /* |
128 | * An enumerator over all associations between keys and values. |
129 | * |
130 | * Parameters: |
131 | * |
132 | * map: A pointer to a string map. This parameter cannot be null. |
133 | * |
134 | * enum_func: A pointer to a callback function that will be |
135 | * called by this procedure once for every key associated |
136 | * with a value. This parameter cannot be null. |
137 | * |
138 | * obj: A pointer to a client-specific object. This parameter will be |
139 | * passed back to the client's callback function. This parameter can |
140 | * be null. |
141 | * |
142 | * Return value: 1 if enumeration completed, 0 otherwise. |
143 | */ |
144 | int sm_enum( const StrMap *map, sm_enum_func enum_func, const void *obj); |
145 |
146 | #ifdef __cplusplus |
147 | } |
148 | #endif |
149 |
150 | #endif |
strmap.c
001 | #include "strmap.h" |
002 |
003 | typedef struct Pair Pair; |
004 |
005 | typedef struct Bucket Bucket; |
006 |
007 | struct Pair { |
008 | char *key; |
009 | char *value; |
010 | }; |
011 |
012 | struct Bucket { |
013 | unsigned int count; |
014 | Pair *pairs; |
015 | }; |
016 |
017 | struct StrMap { |
018 | unsigned int count; |
019 | Bucket *buckets; |
020 | }; |
021 |
022 | static Pair * get_pair(Bucket *bucket, const char *key); |
023 | static unsigned long hash( const char *str); |
024 |
025 | StrMap * sm_new(unsigned int capacity) |
026 | { |
027 | StrMap *map; |
028 | |
029 | map = malloc ( sizeof (StrMap)); |
030 | if (map == NULL) { |
031 | return NULL; |
032 | } |
033 | map->count = capacity; |
034 | map->buckets = malloc (map->count * sizeof (Bucket)); |
035 | if (map->buckets == NULL) { |
036 | free (map); |
037 | return NULL; |
038 | } |
039 | memset (map->buckets, 0, map->count * sizeof (Bucket)); |
040 | return map; |
041 | } |
042 |
043 | void sm_delete(StrMap *map) |
044 | { |
045 | unsigned int i, j, n, m; |
046 | Bucket *bucket; |
047 | Pair *pair; |
048 |
049 | if (map == NULL) { |
050 | return ; |
051 | } |
052 | n = map->count; |
053 | bucket = map->buckets; |
054 | i = 0; |
055 | while (i < n) { |
056 | m = bucket->count; |
057 | pair = bucket->pairs; |
058 | j = 0; |
059 | while (j < m) { |
060 | free (pair->key); |
061 | free (pair->value); |
062 | pair++; |
063 | j++; |
064 | } |
065 | free (bucket->pairs); |
066 | bucket++; |
067 | i++; |
068 | } |
069 | free (map->buckets); |
070 | free (map); |
071 | } |
072 |
073 | int sm_get( const StrMap *map, const char *key, char *out_buf, unsigned int n_out_buf) |
074 | { |
075 | unsigned int index; |
076 | Bucket *bucket; |
077 | Pair *pair; |
078 |
079 | if (map == NULL) { |
080 | return 0; |
081 | } |
082 | if (key == NULL) { |
083 | return 0; |
084 | } |
085 | index = hash(key) % map->count; |
086 | bucket = &(map->buckets[index]); |
087 | pair = get_pair(bucket, key); |
088 | if (pair == NULL) { |
089 | return 0; |
090 | } |
091 | if (out_buf == NULL && n_out_buf == 0) { |
092 | return strlen (pair->value) + 1; |
093 | } |
094 | if (out_buf == NULL) { |
095 | return 0; |
096 | } |
097 | if ( strlen (pair->value) >= n_out_buf) { |
098 | return 0; |
099 | } |
100 | strcpy (out_buf, pair->value); |
101 | return 1; |
102 | } |
103 |
104 | int sm_exists( const StrMap *map, const char *key) |
105 | { |
106 | unsigned int index; |
107 | Bucket *bucket; |
108 | Pair *pair; |
109 |
110 | if (map == NULL) { |
111 | return 0; |
112 | } |
113 | if (key == NULL) { |
114 | return 0; |
115 | } |
116 | index = hash(key) % map->count; |
117 | bucket = &(map->buckets[index]); |
118 | pair = get_pair(bucket, key); |
119 | if (pair == NULL) { |
120 | return 0; |
121 | } |
122 | return 1; |
123 | } |
124 |
125 | int sm_put(StrMap *map, const char *key, const char *value) |
126 | { |
127 | unsigned int key_len, value_len, index; |
128 | Bucket *bucket; |
129 | Pair *tmp_pairs, *pair; |
130 | char *tmp_value; |
131 | char *new_key, *new_value; |
132 |
133 | if (map == NULL) { |
134 | return 0; |
135 | } |
136 | if (key == NULL || value == NULL) { |
137 | return 0; |
138 | } |
139 | key_len = strlen (key); |
140 | value_len = strlen (value); |
141 | /* Get a pointer to the bucket the key string hashes to */ |
142 | index = hash(key) % map->count; |
143 | bucket = &(map->buckets[index]); |
144 | /* Check if we can handle insertion by simply replacing |
145 | * an existing value in a key-value pair in the bucket. |
146 | */ |
147 | if ((pair = get_pair(bucket, key)) != NULL) { |
148 | /* The bucket contains a pair that matches the provided key, |
149 | * change the value for that pair to the new value. |
150 | */ |
151 | if ( strlen (pair->value) < value_len) { |
152 | /* If the new value is larger than the old value, re-allocate |
153 | * space for the new larger value. |
154 | */ |
155 | tmp_value = realloc (pair->value, (value_len + 1) * sizeof ( char )); |
156 | if (tmp_value == NULL) { |
157 | return 0; |
158 | } |
159 | pair->value = tmp_value; |
160 | } |
161 | /* Copy the new value into the pair that matches the key */ |
162 | strcpy (pair->value, value); |
163 | return 1; |
164 | } |
165 | /* Allocate space for a new key and value */ |
166 | new_key = malloc ((key_len + 1) * sizeof ( char )); |
167 | if (new_key == NULL) { |
168 | return 0; |
169 | } |
170 | new_value = malloc ((value_len + 1) * sizeof ( char )); |
171 | if (new_value == NULL) { |
172 | free (new_key); |
173 | return 0; |
174 | } |
175 | /* Create a key-value pair */ |
176 | if (bucket->count == 0) { |
177 | /* The bucket is empty, lazily allocate space for a single |
178 | * key-value pair. |
179 | */ |
180 | bucket->pairs = malloc ( sizeof (Pair)); |
181 | if (bucket->pairs == NULL) { |
182 | free (new_key); |
183 | free (new_value); |
184 | return 0; |
185 | } |
186 | bucket->count = 1; |
187 | } |
188 | else { |
189 | /* The bucket wasn't empty but no pair existed that matches the provided |
190 | * key, so create a new key-value pair. |
191 | */ |
192 | tmp_pairs = realloc (bucket->pairs, (bucket->count + 1) * sizeof (Pair)); |
193 | if (tmp_pairs == NULL) { |
194 | free (new_key); |
195 | free (new_value); |
196 | return 0; |
197 | } |
198 | bucket->pairs = tmp_pairs; |
199 | bucket->count++; |
200 | } |
201 | /* Get the last pair in the chain for the bucket */ |
202 | pair = &(bucket->pairs[bucket->count - 1]); |
203 | pair->key = new_key; |
204 | pair->value = new_value; |
205 | /* Copy the key and its value into the key-value pair */ |
206 | strcpy (pair->key, key); |
207 | strcpy (pair->value, value); |
208 | return 1; |
209 | } |
210 |
211 | int sm_get_count( const StrMap *map) |
212 | { |
213 | unsigned int i, j, n, m; |
214 | unsigned int count; |
215 | Bucket *bucket; |
216 | Pair *pair; |
217 |
218 | if (map == NULL) { |
219 | return 0; |
220 | } |
221 | bucket = map->buckets; |
222 | n = map->count; |
223 | i = 0; |
224 | count = 0; |
225 | while (i < n) { |
226 | pair = bucket->pairs; |
227 | m = bucket->count; |
228 | j = 0; |
229 | while (j < m) { |
230 | count++; |
231 | pair++; |
232 | j++; |
233 | } |
234 | bucket++; |
235 | i++; |
236 | } |
237 | return count; |
238 | } |
239 |
240 | int sm_enum( const StrMap *map, sm_enum_func enum_func, const void *obj) |
241 | { |
242 | unsigned int i, j, n, m; |
243 | Bucket *bucket; |
244 | Pair *pair; |
245 |
246 | if (map == NULL) { |
247 | return 0; |
248 | } |
249 | if (enum_func == NULL) { |
250 | return 0; |
251 | } |
252 | bucket = map->buckets; |
253 | n = map->count; |
254 | i = 0; |
255 | while (i < n) { |
256 | pair = bucket->pairs; |
257 | m = bucket->count; |
258 | j = 0; |
259 | while (j < m) { |
260 | enum_func(pair->key, pair->value, obj); |
261 | pair++; |
262 | j++; |
263 | } |
264 | bucket++; |
265 | i++; |
266 | } |
267 | return 1; |
268 | } |
269 |
270 | /* |
271 | * Returns a pair from the bucket that matches the provided key, |
272 | * or null if no such pair exist. |
273 | */ |
274 | static Pair * get_pair(Bucket *bucket, const char *key) |
275 | { |
276 | unsigned int i, n; |
277 | Pair *pair; |
278 |
279 | n = bucket->count; |
280 | if (n == 0) { |
281 | return NULL; |
282 | } |
283 | pair = bucket->pairs; |
284 | i = 0; |
285 | while (i < n) { |
286 | if (pair->key != NULL && pair->value != NULL) { |
287 | if ( strcmp (pair->key, key) == 0) { |
288 | return pair; |
289 | } |
290 | } |
291 | pair++; |
292 | i++; |
293 | } |
294 | return NULL; |
295 | } |
296 |
297 | /* |
298 | * Returns a hash code for the provided string. |
299 | */ |
300 | static unsigned long hash( const char *str) |
301 | { |
302 | unsigned long hash = 5381; |
303 | int c; |
304 |
305 | while (c = *str++) { |
306 | hash = ((hash << 5) + hash) + c; |
307 | } |
308 | return hash; |
309 | } |
前一节与这节这两种实现方法看似比较类似,但也有差异:
基于哈希桶的情况下,由于Hash桶容量的限制,所以,有可能发生Hash表填不满的情况,也就是,虽然Hash表里面还有空位,但是新建的表项由于冲突过多,而不能装入Hash表中。不过,这样的实现也有其好处,就是查表的最大开销是可以确定的,因为最多处理的冲突数是确定的,所以算法的时间复杂度为O(1)+O(m),其中m为Hash桶容量。
而另一种通过链表的实现,由于Hash桶的容量是无限的,因此,只要没有超出Hash表的最大容量,就能够容纳新建的表项。但是,一旦发生了Hash冲突严重的情况,就会造成Hash桶的链表过长,大大降低查找效率。在最坏的情况下,时间复杂度退化为O(n),其中n为Hash表的总容量。当然,这种情况的概率小之又小,几乎是可以忽略的。