概述
redis内部的字典实现,字典使用了hash表作为dict底层实现。
数据结构
一个dict中包含两个hash表(ht[2]),其中ht[1]主要是在对dict进行rehash时使用。hash表使用链地址法解决key冲突,即包含相同key的value用链表存储,入下图所示。
api
dict *dictCreate(dictType *type, void *privDataPtr); //创建hash表
void dictRelease(dict *d); //清空并释放hash表
void dictEmpty(dict *d); //清空hash表
int dictResize(dict *d); //将hash表大小重新调整为可以容纳当前所有元素的最小大小,在redis中,当hash表使用率小于10%时,会执行resize以节省内存
void dictEnableResize(void); //允许resize
void dictDisableResize(void); //不允许resize
int dictExpand(dict *d, unsigned long size); //为dict扩容
int dictAdd(dict *d, void *key, void *val); //添加元素
dictEntry *dictAddRaw(dict *d, void *key); //添加元素但不设置value,主要为方便存储非指针value(比如整数)
int dictReplace(dict *d, void *key, void *val); //添加元素,如果key存在,则更新它
int dictDelete(dict *d, const void *key); //删除元素
int dictDeleteNoFree(dict *d, const void *key); //删除元素,但是不需要为key和value调用keyDestructor和valDestructor
dictEntry * dictFind(dict *d, const void *key); //查找元素,返回dictEntry结构体指针
void *dictFetchValue(dict *d, const void *key); //查找元素,返回val指针
dictEntry *dictGetRandomKey(dict *d); //随机返回一个元素
dictIterator *dictGetIterator(dict *d); //创建一个不安全的迭代器
dictIterator *dictGetSafeIterator(dict *d); //创建一个安全的迭代器
dictEntry *dictNext(dictIterator *iter); //指向下一个节点,如果迭代完毕,返回NULL
void dictReleaseIterator(dictIterator *iter); //释放迭代器
unsigned int dictGenHashFunction(const void *key, int len); //MurmurHash2 hash函数
unsigned int dictGenCaseHashFunction(const unsigned char *buf, int len); //djb hash函数
int dictRehash(dict *d, int n); //对dict执行n次rehash
int dictRehashMilliseconds(dict *d, int ms); //在ms毫秒内,对dict进行rehash
void dictSetHashFunctionSeed(unsigned int initval); //设置hash种子,若不设置,默认为5381
unsigned int dictGetHashFunctionSeed(void); //获取hash种子
应用
#include <string.h>
#include <assert.h>
#include <stdlib.h>
#include <iostream>
#include <string>
#include "dict.h"
using namespace std;
unsigned int DictStringHashFunc(const void *key) {
assert(key != NULL);
return dictGenHashFunction(key, strlen((char *)key));
}
void *DictStringDup(void *privdata, const void *key) {
assert(key != NULL);
return strdup((char *)key);
}
int DictStringCompare(void *privdata, const void *key1, const void *key2) {
return strcmp((char *)key1, (char *)key2) == 0;
}
void StringDestructor(void *privdata, void *key) {
free(key);
}
dictType g_dict_type = {
DictStringHashFunc, /* hash function */
DictStringDup, /* key dup */
DictStringDup, /* val dup */
DictStringCompare, /* key compare */
StringDestructor, /* key destructor */
StringDestructor /* val destructor */
};
int main(int argc, char *argv[]) {
dict *dic = dictCreate(&g_dict_type,NULL);
string key_base = "key";
string value_base = "value";
for (char ch = 'a'; ch < 'k'; ++ch) {
string key = key_base + ch;
string value = value_base + ch;
dictAdd(dic, (void *)key.c_str(), (void *)value.c_str());
}
cout << "-----------------dict initial stat------------------------------------" << endl;
dictPrintStats(dic);
cout << "-----------------dictIterator-----------------------------------------" << endl;
dictIterator *iter = dictGetIterator(dic);
while (dictNext(iter) != NULL) {
cout << (char *)iter->entry->key << "\t" << (char *)iter->entry->v.val << endl;
}
dictReleaseIterator(iter);
cout << "-----------------dictFind---------------------------------------------" << endl;
string fkey = "keyh";
dictEntry *entry = dictFind(dic, "keyh");
if (entry != NULL) {
cout << "key: " << fkey << "\tvalue:" << (char *)entry->v.val << endl;
}
string new_val = "new_valueh";
dictReplace(dic, (void *)fkey.c_str(), (void *)new_val.c_str());
cout << "-----------------after dictReplace, dictFetchValue--------------------" << endl;
void *val = dictFetchValue(dic, "keyh");
if (val != NULL) {
cout << "key: " << fkey << "\tvalue:" << (char *)val << endl;
}
dictDelete(dic, (void *)fkey.c_str());
val = dictFetchValue(dic, "keyh");
if (val != NULL) {
cout << "key: " << fkey << "\tvalue:" << (char *)val << endl;
}
dictDelete(dic, dictGetRandomKey(dic)->key);
dictDelete(dic, dictGetRandomKey(dic)->key);
cout << "-----------------before dictResize, dict stat-----------------------" << endl;
dictPrintStats(dic);
dictResize(dic);
cout << "-----------------after dictResize, dict stat------------------------" << endl;
dictPrintStats(dic);
dictRehash(dic, 8);
cout << "-----------------after rehash 8 times, dict stat--------------------" << endl;
dictPrintStats(dic);
dictEmpty(dic);
cout << "-----------------after dictEmpty, dict stat-------------------------" << endl;
dictPrintStats(dic);
dictRelease(dic);
return 0;
}
运行结果:
-----------------dict initial stat------------------------------------
Hash table stats:
table size: 8
number of elements: 7
different slots: 5
max chain length: 2
avg chain length (counted): 1.40
avg chain length (computed): 1.40
Chain length distribution:
0: 3 (37.50%)
1: 3 (37.50%)
2: 2 (25.00%)
-- Rehashing into ht[1]:
Hash table stats:
table size: 16
number of elements: 3
different slots: 3
max chain length: 1
avg chain length (counted): 1.00
avg chain length (computed): 1.00
Chain length distribution:
0: 13 (81.25%)
1: 3 (18.75%)
-----------------dictIterator-----------------------------------------
keya valuea
keyb valueb
keyh valueh
keye valuee
keyd valued
keyc valuec
keyg valueg
keyj valuej
keyf valuef
keyi valuei
-----------------dictFind---------------------------------------------
key: keyh value:valueh
-----------------after dictReplace, dictFetchValue--------------------
key: keyh value:new_valueh
-----------------before dictResize, dict stat-----------------------
Hash table stats:
table size: 16
number of elements: 7
different slots: 7
max chain length: 1
avg chain length (counted): 1.00
avg chain length (computed): 1.00
Chain length distribution:
0: 9 (56.25%)
1: 7 (43.75%)
-----------------after dictResize, dict stat------------------------
Hash table stats:
table size: 16
number of elements: 7
different slots: 7
max chain length: 1
avg chain length (counted): 1.00
avg chain length (computed): 1.00
Chain length distribution:
0: 9 (56.25%)
1: 7 (43.75%)
-- Rehashing into ht[1]:
No stats available for empty dictionaries
-----------------after rehash 8 times, dict stat--------------------
Hash table stats:
table size: 8
number of elements: 7
different slots: 7
max chain length: 1
avg chain length (counted): 1.00
avg chain length (computed): 1.00
Chain length distribution:
0: 1 (12.50%)
1: 7 (87.50%)
-----------------after dictEmpty, dict stat-------------------------
No stats available for empty dictionaries
运行结果分析:
dict initial stat
插入10个元素,经历了两次expand,第一次expand发生在插入第5个元素时,hashtable初始大小为4,所以插入第5个元素时,经历了一次expand,此时的状态为,ht[0] size为4,ht[1] size为8;此后的新元素都插入ht[1],同时每次插入,都会进行一次rehash,将ht[0]中的一个bucket移入ht[1];第二次expand发生在插入第9个元素时,此时的状态为,ht[0] size为8,used为8,ht[1] size为16,used为1,插入第10个元素时,也进行了一次rehash,将ht[0]中的一个bucket移入ht[1],而这个bucket中只有一个元素;所以插入完成后的状态为运行结果所示,ht[0]的table size为8,number of elements为7,ht[1]的table size为16,number of elements为3
dictIterator
此时dict中所有的kv对,包括ht[0]和ht[1]中的所有元素
dictFind
对应于keyh的元素为valueh,同时执行了一次rehash,将ht[0]中的一个bucket移入ht[1],而这个bucket中有两个元素,此时的状态为,ht[0] size为8,used为5,ht[1]的size为16,used为5
后面的dictReplace中也经历了一次dictAdd和一次dictFind,个执行了一次rehash,ht[0] size为8,used为2,ht[1]的size为16,used为8
after dictReplace, dictFetchValue
dictFetchValue执行了一次rehash,此时的状态为,ht[0] size为8,used为1,ht[1]的size为16,used为9
dictDelete删除了一个元素,执行了一次rehash,此时的状态为,ht[0] size为8,used为0,ht[1]的size为16,used为9
后面的dictFetchValue执行了最后一次rehash,ht[0]=ht[1],释放ht[1],此时的状态为,ht[0] size为16,used为9,ht[1]的size为0,used为0,rehash状态结束
后面的两次dictDelete从ht[0]中删除了2个元素,此时的状态为,ht[0] size为16,used为7,ht[1]的size为0,used为0
before dictResize, dict stat
resize之前的状态为和之前的状态一样,ht[0] size为16,used为7,ht[1]的size为0,used为0
after dictResize, dict stat
dictResize,ht[1]初始化为size为_dictNextPower(7)=8,此时的状态为,ht[0] size为16,used为7,ht[1]的size为8,used为0
after rehash 8 times, dict stat
经历8次rehash后,将ht[0]中的7个元素移入ht[1],将ht[1]赋给ht[0],同时释放ht[1],此时的状态为,ht[0] size为8,used为7,ht[1]的size为0,used为0
after dictEmpty, dict stat
dictEmtpy之后,清空dict,此时的状态为,ht[0] size为0,used为0,ht[1]的size为0,used为0
参考
http://www.cppblog.com/mysileng/archive/2013/04/05/199132.html Redis 设计与实现--3--内部数据结构--字典
http://redisbook.com/ Redis 设计与实现