引言
哈希表这种key-value的键值对结构,查询和插入的效率很高,我先初步写一个hash_map的C语言实现,后面再慢慢完善,我现在是用动态数组+链表实现的(这种是叫拉链法吧,不知道理解错没有)。这里的hash函数使用murmur_hash2。hash函数的作用就是把key映射到数组的index位置,链表用于储存key-value结构。
代码部分
hash_map.h
lf是代表负载因子(load factor)。
// implement a dynamic hash_map
// lf=nodes/ arr_len;
#ifndef CPP_LEARNING_HASH_MAP_H
#define CPP_LEARNING_HASH_MAP_H
#include <stdbool.h>
#include <stdint.h>
#define LOAD_FACTOR 0.7
typedef char* K;
typedef char* V;
typedef struct kvnode{
K key;
V value;
struct kvnode* next;
}KVNode;
typedef struct {
int capacity;
int node_num;
int hash_seed;
KVNode ** hash_pointer; // KVNode * array
}HashMap;
static float calc_load_factor(int node_num, int capacity);
/* murmur_hash2 */
static uint32_t hash(const void* key, int len, uint32_t seed);
HashMap* create_hash_map(int init_capacity);
bool del_hash_map(HashMap* map);
V hash_map_put(HashMap* map,K key, V value);
bool hash_map_remove(HashMap* map, K key);
V hash_map_get(HashMap* map,K key);
static bool expand_hash_map(HashMap* map, int dest_capacity);
static int rehash(K key,int capacity, int seed);
static void del_node_link(KVNode* head);
#endif //CPP_LEARNING_HASH_MAP_H
hash_map.c
key-value储存的地方也叫 hash_bucket 哈希桶。
// implement a dynamic hash_map
// lf=nodes/ arr_len;
#include "hash_map.h"
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <time.h>
static float calc_load_factor(int node_num, int capacity){
return (float)node_num/capacity;
}
/* murmur_hash2 */
static uint32_t hash(const void* key, int len, uint32_t seed) {
const uint32_t m = 0x5bd1e995;
const int r = 24;
uint32_t h = seed ^ len;
const unsigned char* data = (const unsigned char*)key;
while (len >= 4) {
uint32_t k = *(uint32_t*)data;
k *= m;
k ^= k >> r;
k *= m;
h *= m;
h ^= k;
data += 4;
len -= 4;
}
switch (len) {
case 3: h ^= data[2] << 16;
case 2: h ^= data[1] << 8;
case 1: h ^= data[0];
h *= m;
};
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}
HashMap* create_hash_map(int init_capacity){
HashMap* map= calloc(1, sizeof(HashMap));
if (map==NULL){
printf("allocate error in create_hash_map.\n");
return false;
}
map->capacity=init_capacity;
map->hash_seed= time(NULL);
map->hash_pointer=(KVNode**) calloc(map->capacity, sizeof(KVNode*));
if (map->hash_pointer==NULL){
printf("allocate error in create_hash_map(hash_bucket).\n");
exit(-9);
}
return map;
}
static void del_node_link(KVNode* head){
if(head==NULL){
return;
}
KVNode *prev=head;
KVNode *curr=head;
while(curr!=NULL){
prev=curr;
curr=prev->next;
free(prev);
}
}
bool del_hash_map(HashMap* map){
// KVnode free
KVNode **ptr= map->hash_pointer;
for (int i = 0; i < map->capacity; ++i) {
del_node_link(ptr[i]);
}
// free dynamic array
free(map->hash_pointer);
free(map);
}
// 在一个bucket上面的节点的hash值相同。
static int rehash(K key,int capacity, int seed){
return hash(key,strlen(key),seed) % capacity;
}
static bool expand_hash_map(HashMap* map, int dest_capacity){
KVNode** expand_hash_bucket= calloc(dest_capacity,sizeof(KVNode*));
if (expand_hash_bucket==NULL){
printf("allocation error in expand_hash_map.\n");
exit(-2);
}
// rehash existing KVnodes
KVNode** old_hash_bucket= map->hash_pointer;
int new_index=0; // for re_hash
for(int i=0;i<map->capacity;i++){ // i denotes old_index
// just need chang head pointer;
if(old_hash_bucket[i]==NULL){
continue;
}
// 随便取个node就行
new_index=rehash(old_hash_bucket[i]->key,dest_capacity,map->hash_seed);
expand_hash_bucket[new_index]=old_hash_bucket[i];
}
// free old bucket
free(map->hash_pointer);
map->hash_pointer= expand_hash_bucket;
map->capacity=dest_capacity;
return true;
}
V hash_map_put(HashMap* map,K key, V value){
// judge expand is needed?
float lf=calc_load_factor(map->node_num+1,map->capacity);
if(lf>LOAD_FACTOR){
// expand double-size
bool flag=expand_hash_map(map,map->capacity<<1);
if(!flag){
printf("expand hash_map error!!\n");
exit(-7);
}
}
// put element into hash_map
// hash
int index=hash(key, strlen(key),map->hash_seed) % map->capacity;
KVNode* current=map->hash_pointer[index];
while(current!=NULL){
if(strcmp(current->key,key)==0){
V old_value=current->value;
current->value=value;
return old_value;
}
current=current->next;
}
//create a new node
KVNode *new_node= calloc(1, sizeof(KVNode));
new_node->key=key,new_node->value=value;
// new nodes
current=map->hash_pointer[index];
new_node->next=current;
map->hash_pointer[index]=new_node;
map->node_num++;
return NULL;
}
V hash_map_get(HashMap* map,K key){
int i=hash(key, strlen(key),map->hash_seed) % map->capacity;
KVNode *link=map->hash_pointer[i];
while(link!=NULL){
if(strcmp(link->key,key)==0){
return link->value;
}
link=link->next;
}
printf("hash_map_get KeyError: %s\n",key);
exit(-5);
}
bool hash_map_remove(HashMap* map, K key){
int i=hash(key, strlen(key),map->hash_seed) % map->capacity;
// map->hash_pointer[i] remove_one
map->node_num--;
KVNode *link=map->hash_pointer[i];
KVNode *prev=link;
while(link!=NULL){
if(strcmp(link->key,key)==0){
// delete
if(link==prev){
// head
map->hash_pointer[i]=link->next;
free(link);
return true;
}
prev->next=link->next;
free(link);
return true;
}
prev=link;
link=link->next;
}
printf("hash_map_remove KeyError: %s\n",key);
exit(-5);
}