散列表
散列表一般也称为哈希表,是根据关键码值而直接进行访问的数据结构,是key,value类型的数据存储结构,一般的就是通过将key进行映射到一个位置来访问记录,以加快查找的速度,这个映射函数也称为散列函数,存放记录的数组称为散列表。
常用的散列函数
常用的散列函数有如下:
- 直接寻址法:取关键字或关键字的某个线性函数值为散列地址。即H(key)=key或H(key) = a•key + b,其中a和b为常数,此时得出的计算值都是线性的。
- 平方折中法:取关键字平方后的中间几位作为散列地址。
- 除留余数法:选取关键字之后对该值求余得到下标位置。
还有其他方法都可以作为散列函数,本文主要是讲述简单的数组,配置一个移动的散列函数,数据的增加与删除。
简单的散列表的操作示意图
)
本文就按照大概这个思路去实现字典,该字典参考了redis早期的beta版本的实现。
示例代码
c语言dict.c内容如下;
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "dict.h"
static unsigned int arraydictHashFunction(const void *key);
static int arraydictKeyCompare(void *privdata, const void *key1, const void *key2);
struct dictType dict_type = {
arraydictHashFunction,
arraydictKeyCompare
};
static unsigned int arraydictHashFunction(const void *key){
unsigned int hash = 5381;
char *base = (char *)key;
if (!base){
return -1;
}
int lenght = strlen(base);
while (lenght--){
hash = ((hash<<5) + hash) + (*base++);
}
return hash;
}
static unsigned int _dictNextPower(unsigned int size){
unsigned int i = DICT_HT_INITIAL_SIZE;
if (size >= UINT_MAX) return UINT_MAX;
while (1){
if (i >= size) return i;
i *= 2;
}
}
static void _dictInit(dict *ht){
ht->table = NULL;
ht->size = 0;
ht->sizemask = 0;
ht->used = 0;
ht->type = &dict_type;
}
static void *_dictAlloc(int size){
void *p = malloc(size);
if (p == NULL){
return NULL;
}
return p;
}
static void _dictFree(void *ptr){
free(ptr);
}
dict *createDict(){
dict *d;
d = (dict *)_dictAlloc(sizeof(dict));
_dictInit(d);
return d;
}
static int dictExpand(dict *ht, int size){
printf("resize \n");
dict n;
unsigned int realsize = _dictNextPower(size), i;
if (ht->used > size){
return DICT_ERR;
}
_dictInit(&n);
n.size = realsize;
n.sizemask = realsize - 1;
// 申请内存
n.table = _dictAlloc(realsize*sizeof(dictEntry*));
memset(n.table, 0, realsize*sizeof(dictEntry*));
n.used = ht->used;
for (int i = 0; (i< ht->size) && (ht->used > 0); i++){
dictEntry *he, *nextHe;
if (ht->table[i] == NULL) continue;
he = ht->table[i];
while (he){
// 将以前指向的内容重新指导新的地址,原来的dictEntry内容不释放
unsigned int h;
nextHe = he->next;
// 计算hash值 即位置
h = dictHashKey(ht, he->key) & n.sizemask;
// 标记当前的元素的下一个是该位置
he->next = n.table[h];
// 讲当前的表位置地址进行赋值
n.table[h] = he;
ht->used--;
// 进行下一个元素
he = nextHe;
}
}
printf("end reseize\n");
if (ht->table) _dictFree(ht->table);
*ht = n;
return DICT_OK;
}
static int _dictExpandIfNeed(dict *ht){
if (ht->size == 0){
return dictExpand(ht, DICT_HT_INITIAL_SIZE);
}
if (ht->used == ht->size){
return dictExpand(ht, ht->size*2);
}
return DICT_OK;
}
static int arraydictKeyCompare(void *privdata, const void *key1, const void *key2){
(void *)privdata;
return strcmp(key1, key2);
}
int _dictKeyIndex(dict *ht, void *key){
char *key_s = key;
int res, com_res;
unsigned int h;
dictEntry *head;
res = _dictExpandIfNeed(ht);
if (res == DICT_ERR) return DICT_ERR;
// 计算hash值
h = dictHashKey(ht, key) & ht->sizemask;
head = ht->table[h];
while (head){
com_res = dictCompareHashKeys(ht, key_s, head->key);
if (com_res == 0){
return DICT_ERR;
}
head = head->next;
}
return h;
}
int dictAdd(dict *ht, void *key, void *val){
int index;
dictEntry *entry;
char *key_s = key;
char *val_s = val;
// 遍历ht 查看是否已经存在
if ((index = _dictKeyIndex(ht, key)) == DICT_ERR){
return DICT_ERR;
}
// 申请内存存放数据
entry = (dictEntry *)malloc(sizeof(dictEntry));
// 设置值
entry->hash = index;
entry->key = key;
entry->val = val;
entry->next = NULL;
if (ht->table[index]){
entry->next = ht->table[index]->next;
ht->table[index]->next = entry;
} else {
ht->table[index] = entry;
}
ht->used++;
return DICT_OK;
}
dictEntry *dictFind(dict *ht, const void *key){
dictEntry *head;
unsigned int h;
int res ;
char *key_s = (char *)key;
if (ht->size == 0) return NULL;
h = dictHashKey(ht, key) & ht->sizemask;
head = ht->table[h];
while(head) {
res = dictCompareHashKeys(ht, key_s, head->key);
if (res == 0){
return head;
}
head = head->next;
}
return NULL;
}
int dictReplace(dict *ht, void *key, void *val){
dictEntry *entry;
if (dictAdd(ht, key, val) == DICT_OK){
return DICT_OK;
}
entry = dictFind(ht, key);
// 替代旧值
if (entry) {
entry->val = val;
return DICT_OK;
}
return DICT_ERR;
}
int dictDelete(dict *ht, const void *key){
unsigned int h;
int res;
dictEntry *he, *prevHe;
if (ht->size == 0){
return DICT_ERR;
}
h = dictHashKey(ht, key) & ht->sizemask;
he = ht->table[h];
prevHe = NULL;
while (he) {
res = dictCompareHashKeys(ht, key, he->key);
if (res == 0){
if (prevHe)
prevHe->next = he->next;
else
ht->table[h] = he->next;
// 释放dictentry
// 释放key val 对应的申请的内存的数据
_dictFree(he);
ht->used--;
return DICT_OK;
}
}
}
void show_dict(dict *d){
printf("used : %d\n", d->used);
printf("size : %d\n", d->size);
dictEntry *he;
for (int i = 0; i< d->size; i++){
if (d->table[i]) {
he = d->table[i];
while(he){
char *key_s = he->key;
char *val_s = he->val;
printf("start show key\n");
printf("entry : key : %s val : %s hash : %d\n", key_s, val_s, he->hash);
printf("end show key\n");
he = he->next;
}
}
}
}
int main(int argc, char const *argv[])
{
dict d;
_dictInit(&d);
printf("start\n");
dictAdd(&d, "123", "val123");
dictAdd(&d, "123", "val1234");
dictAdd(&d, "1234", "val1234");
show_dict(&d);
dictAdd(&d, "12345", "val1234");
show_dict(&d);
char *v_k = "1231";
printf("find : %s\n", v_k);
dictEntry *en = NULL;
en = dictFind(&d, v_k);
if (en) {
v_k = en->key;
printf("find : key %s\n", v_k);
}
int res = dictReplace(&d, "123", "val");
printf("replace : %d\n", res);
show_dict(&d);
printf("delete\n");
dictDelete(&d, "123");
dictDelete(&d, "1234");
dictDelete(&d, "12345");
show_dict(&d);
return 0;
}
相应的头文件dict.h内容如下:
#ifndef __DICT_H__
#define __DICT_H__
#define DICT_ERR -1
#define DICT_OK 1
typedef struct dictEntry {
void *key;
void *val;
unsigned int hash;
struct dictEntry *next;
} dictEntry;
typedef struct dictType {
unsigned int (*hashFunction)(const void *key);
int (*keyCompare)(void *privdata, const void *key1, const void *key2);
} dictType;
typedef struct dict {
dictEntry **table;
dictType *type;
unsigned int used;
unsigned int sizemask;
unsigned int size;
void *privdata;
} dict;
/* This is the initial size of every hash table */
#define DICT_HT_INITIAL_SIZE 16
#define UINT_MAX 1024
#define dictHashKey(ht, key) (ht)->type->hashFunction(key)
#define dictCompareHashKeys(ht, key1, key2) \
((ht)->type->keyCompare) ?\
(ht)->type->keyCompare((ht)->privdata, key1, key2) :\
((key1) == (key2))
#define dictGetEntryKey(he) ((he->key))
#define dictGetEntryVal(he) ((he->val))
dict *createDict();
#endif
此时在该目录下的终端中编译该dict.c文件,并运行;
gcc -o dict dict.c
./dict
示例代码的输出结果如下所示;
start
resize
end reseize
used : 2
size : 16
start show key
entry : key : 123 val : val123 hash : 11
end show key
start show key
entry : key : 1234 val : val1234 hash : 15
end show key
used : 3
size : 16
start show key
entry : key : 12345 val : val1234 hash : 4
end show key
start show key
entry : key : 123 val : val123 hash : 11
end show key
start show key
entry : key : 1234 val : val1234 hash : 15
end show key
find : 1231
replace : 1
used : 3
size : 16
start show key
entry : key : 12345 val : val1234 hash : 4
end show key
start show key
entry : key : 123 val : val hash : 11
end show key
start show key
entry : key : 1234 val : val1234 hash : 15
end show key
delete
used : 0
size : 16
这就是本文的散列表的大概实现。
总结
本文描述的相关的散列表,是对前文Python中的字典的实现的一个补充,也是参考redis字典的实现思路,大家可继续探索,由于本人才疏学浅,如有错误请批评指正。