hash字典,kv存储,用于快速查询,redis中的源码为 dict.h和dict.c, redis hash字典采用渐进式hash实现,具体渐进式hash,请搜索相关博文阅读。
1,redis hash字典的结构体声明
typedef struct dictEntry {//字典实体节点
void *key; //字典实体节点key
union {
void *val;
uint64_t u64;
int64_t s64;
double d;
} v;//数据联合体,其中val为值实际值的指针,用于通用结构
struct dictEntry *next; //下一个字典实体节点指针
} dictEntry;
typedef struct dictType {//字典操作
uint64_t (*hashFunction)(const void *key);//hash函数
void *(*keyDup)(void *privdata, const void *key);//key复制函数
void *(*valDup)(void *privdata, const void *obj);//val值复制函数
int (*keyCompare)(void *privdata, const void *key1, const void *key2);//key比较函数
void (*keyDestructor)(void *privdata, void *key);//key指针析构销毁函数
void (*valDestructor)(void *privdata, void *obj);//val指针析构销毁函数
int (*expandAllowed)(size_t moreMem, double usedRatio);//是否允许扩容
} dictType;
/* This is our hash table structure. Every dictionary has two of this as we
* implement incremental rehashing, for the old to the new table. */
typedef struct dictht {//hash表结构体
dictEntry **table;//hash表的指针数组
unsigned long size;//hash数组大小
unsigned long sizemask;//hash数组长度掩码, sizemask = size-1
unsigned long used;//hash表的kv对个数,已经使用的容量,如果used==size,再插入的时候就需要扩容
} dictht;
typedef struct dict {//hash字典结构
dictType *type;//字典类型,实际为字典的函数指针,用于特化字典相关操作
void *privdata;//私有数据
dictht ht[2]; //字典hashtable数组,长度为2,主要用于渐进式rehash计算
long rehashidx; //渐进式hash,下一个要迁移的桶索引,当为-1时不进行/* rehashing not in progress if rehashidx == -1 */
int16_t pauserehash; //表示rehash是暂停滴,安全的迭代需要停止/* If >0 rehashing is paused (<0 indicates coding error) */
} dict;
/* If safe is set to 1 this is a safe iterator, that means, you can call
* dictAdd, dictFind, and other functions against the dictionary even while
* iterating. Otherwise it is a non safe iterator, and only dictNext()
* should be called while iterating. */
typedef struct dictIterator {//迭代器
dict *d;//字典结构体指针
long index;//dictht->table 指针数组的下标
int table; //dict->ht 的下标
int safe;//如果safe设置为1,就是一个安全的迭代器,可以进行dict的添加删除rehash等需要变更迭代器的操作;如果为0,就只能dictNext操作
dictEntry *entry;//当前字典实体对象
dictEntry *nextEntry;//下一个实体对象
/* unsafe iterator fingerprint for misuse detection. */
long long fingerprint;//dict.c里的dictFingerprint(),对不安全的iterator误用控制
} dictIterator;
2,hash字典的基本方法(创建/添加/修改/删除)
2.1,创建--创建hash字典实现,
主要初始化各个结构成员,赋初始值
//重置ht
static void _dictReset(dictht *ht)
{
ht->table = NULL;
ht->size = 0;
ht->sizemask = 0;
ht->used = 0;
}
/* Create a new hash table */
dict *dictCreate(dictType *type,
void *privDataPtr)
{
dict *d = zmalloc(sizeof(*d));//因为是malloc 后期需要调用dictRease释放
_dictInit(d,type,privDataPtr);
return d;
}
/* Initialize the hash table */
int _dictInit(dict *d, dictType *type,
void *privDataPtr)
{
_dictReset(&d->ht[0]);
_dictReset(&d->ht[1]);
d->type = type;
d->privdata = privDataPtr;
d->rehashidx = -1;
d->pauserehash = 0;
return DICT_OK;
}
2.2,添加--添加元素到hash字典
/* Add an element to the target hash table */
int dictAdd(dict *d, void *key, void *val)
{
dictEntry *entry = dictAddRaw(d,key,NULL);
if (!entry) return DICT_ERR;//添加失败,返回
dictSetVal(d, entry, val);//添加成功,设置值到新节点
return DICT_OK;
}
2.3,修改--替换元素的值,如果不存在就添加元素到hash字典
/* Add or Overwrite:
* Add an element, discarding the old value if the key already exists.
* Return 1 if the key was added from scratch, 0 if there was already an
* element with such key and dictReplace() just performed a value update
* operation. */
int dictReplace(dict *d, void *key, void *val)
{
dictEntry *entry, *existing, auxentry;
/* Try to add the element. If the key
* does not exists dictAdd will succeed. */
entry = dictAddRaw(d,key,&existing);
if (entry) {//如果添加成功
dictSetVal(d, entry, val);//把值设置到新节点中
return 1;
}
/* Set the new value and free the old one. Note that it is important
* to do that in this order, as the value may just be exactly the same
* as the previous one. In this context, think to reference counting,
* you want to increment (set), and then decrement (free), and not the
* reverse. */
//如果添加不成功,就替换旧值
auxentry = *existing;
dictSetVal(d, existing, val);//设置新值
dictFreeVal(d, &auxentry);//释放旧值
return 0;
}
2.4,删除--删除key值得元素
int dictDelete(dict *ht, const void *key) {
return dictGenericDelete(ht,key,0) ? DICT_OK : DICT_ERR;
}
/* Search and remove an element. This is an helper function for
* dictDelete() and dictUnlink(), please check the top comment
* of those functions. */
static dictEntry *dictGenericDelete(dict *d, const void *key, int nofree) {
uint64_t h, idx;
dictEntry *he, *prevHe;
int table;
if (d->ht[0].used == 0 && d->ht[1].used == 0) return NULL;
if (dictIsRehashing(d)) _dictRehashStep(d);//如果当前dict表在rehash中,就需要进行rehash处理
//计算key的hash值
h = dictHashKey(d, key);
for (table = 0; table <= 1; table++) {
idx = h & d->ht[table].sizemask;//计算出hash槽位的索引
he = d->ht[table].table[idx];//取值
prevHe = NULL;
while(he) {
if (key==he->key || dictCompareKeys(d, key, he->key)) {
/* Unlink the element from the list */
if (prevHe)
prevHe->next = he->next;
else
d->ht[table].table[idx] = he->next;
if (!nofree) {//释放需要释放删除的节点
dictFreeKey(d, he);
dictFreeVal(d, he);
zfree(he);
}
d->ht[table].used--;
return he;
}
prevHe = he;
he = he->next;
}
if (!dictIsRehashing(d)) break;
}
return NULL; /* not found */
}
3,hash字典 其他常用操作(查询/清空/释放)
3.1,查询
提供了两个函数:查询一个字典对象实体函数,查询具体值得函数
//获取当前key的 dict
dictEntry *dictFind(dict *d, const void *key)
{
dictEntry *he;
uint64_t h, idx, table;
if (dictSize(d) == 0) return NULL; /* dict is empty */
if (dictIsRehashing(d)) _dictRehashStep(d);
h = dictHashKey(d, key);//获取键值得hash值
for (table = 0; table <= 1; table++) {
idx = h & d->ht[table].sizemask;
he = d->ht[table].table[idx];
while(he) {
if (key==he->key || dictCompareKeys(d, key, he->key))
return he;
he = he->next;
}
if (!dictIsRehashing(d)) return NULL;
}
return NULL;
}
//获取实际的值
void *dictFetchValue(dict *d, const void *key) {
dictEntry *he;
he = dictFind(d,key);
return he ? dictGetVal(he) : NULL;
}
3.2 清空字典
清空字典,只是清空内容,hash字典对象还存在
void dictEmpty(dict *d, void(callback)(void*)) {
_dictClear(d,&d->ht[0],callback);
_dictClear(d,&d->ht[1],callback);
d->rehashidx = -1;
d->pauserehash = 0;
}
3.3 释放字典
释放之前会清空相关数据,也会释放字典malloc的内存
/* Clear & Release the hash table */
void dictRelease(dict *d)
{
_dictClear(d,&d->ht[0],NULL);
_dictClear(d,&d->ht[1],NULL);
zfree(d);
}
4,渐进式rehash
hash表在数据量过大超过当前hash表大小的时候,就会扩容;反之如果hash表很大,而存储的kv值又很少就会缩容,在扩容和缩容的时候就需要进行rehash。 由于redis是一个效率kv库,不可能一次性把一个表表的数据扩容到另一个hash表,这样会很耗时,所以采用了渐进式rehash算法。
redis rehash的触发条件为:
负载因子Q=ht[0].used / ht[0].size。
缩容触发:负载因子Q<=0.1时触发。
扩容触发:没有在进行持久化,负载因子Q>=1时触发;在进行持久化,负载因子Q>=5时触发。持久化时,由于redis是写时复制,因此会多出许多节点,因此负载因子要大于等于5
dict 在进行添加删除的时候,会被动触发rehash, 也能主动触发rehash;并且如果dict在rehash中,添加删除等变更操作的逻辑都会进行rehash计算操作
4.1,扩容缩容函数
/* Resize the table to the minimal size that contains all the elements,
* but with the invariant of a USED/BUCKETS ratio near to <= 1 */
int dictResize(dict *d)
{
unsigned long minimal;
if (!dict_can_resize || dictIsRehashing(d)) return DICT_ERR;
minimal = d->ht[0].used;
if (minimal < DICT_HT_INITIAL_SIZE)
minimal = DICT_HT_INITIAL_SIZE;
return dictExpand(d, minimal);
}
4.2 rehash函数
/* Performs N steps of incremental rehashing. Returns 1 if there are still
* keys to move from the old to the new hash table, otherwise 0 is returned.
*
* Note that a rehashing step consists in moving a bucket (that may have more
* than one key as we use chaining) from the old to the new hash table, however
* since part of the hash table may be composed of empty spaces, it is not
* guaranteed that this function will rehash even a single bucket, since it
* will visit at max N*10 empty buckets in total, otherwise the amount of
* work it does would be unbound and the function may block for a long time. */
int dictRehash(dict *d, int n) {
int empty_visits = n*10; /* Max number of empty buckets to visit. */
if (!dictIsRehashing(d)) return 0;
while(n-- && d->ht[0].used != 0) {
dictEntry *de, *nextde;
/* Note that rehashidx can't overflow as we are sure there are more
* elements because ht[0].used != 0 */
assert(d->ht[0].size > (unsigned long)d->rehashidx);
while(d->ht[0].table[d->rehashidx] == NULL) {
d->rehashidx++;
if (--empty_visits == 0) return 1;
}
de = d->ht[0].table[d->rehashidx];
/* Move all the keys in this bucket from the old to the new hash HT */
while(de) {
uint64_t h;
nextde = de->next;
/* Get the index in the new hash table */
h = dictHashKey(d, de->key) & d->ht[1].sizemask;
de->next = d->ht[1].table[h];
d->ht[1].table[h] = de;
d->ht[0].used--;
d->ht[1].used++;
de = nextde;
}
d->ht[0].table[d->rehashidx] = NULL;
d->rehashidx++;
}
/* Check if we already rehashed the whole table... */
if (d->ht[0].used == 0) {
zfree(d->ht[0].table);
d->ht[0] = d->ht[1];
_dictReset(&d->ht[1]);
d->rehashidx = -1;
return 0;
}
/* More to rehash... */
return 1;
}
5,hash算法
hash算法的具体实现,在siphash.c文件
uint64_t dictGenHashFunction(const void *key, int len);
uint64_t dictGenCaseHashFunction(const unsigned char *buf, int len);
/* The default hashing function uses SipHash implementation
* in siphash.c. */
uint64_t siphash(const uint8_t *in, const size_t inlen, const uint8_t *k);
uint64_t siphash_nocase(const uint8_t *in, const size_t inlen, const uint8_t *k);
//对字符串进行hash
uint64_t dictGenHashFunction(const void *key, int len) {
return siphash(key,len,dict_hash_function_seed);
}
//对字符串进行hash,不区分大小写
uint64_t dictGenCaseHashFunction(const unsigned char *buf, int len) {
return siphash_nocase(buf,len,dict_hash_function_seed);
6,迭代器
迭代器, 大意思想可以参考stl的迭代器作用,因为C没有容器stl,所以自己开发了迭代器
//获取一个迭代器,其实就是生成一个,并赋初始值
dictIterator *dictGetIterator(dict *d)
{
dictIterator *iter = zmalloc(sizeof(*iter));
iter->d = d;
iter->table = 0;
iter->index = -1;
iter->safe = 0;
iter->entry = NULL;
iter->nextEntry = NULL;
return iter;
}
//获取一个安全类型的迭代器
dictIterator *dictGetSafeIterator(dict *d) {
dictIterator *i = dictGetIterator(d);
i->safe = 1;
return i;
}
//获取迭代器的下一个实体dict
dictEntry *dictNext(dictIterator *iter)
{
while (1) {
if (iter->entry == NULL) {
dictht *ht = &iter->d->ht[iter->table];
if (iter->index == -1 && iter->table == 0) {
if (iter->safe)
dictPauseRehashing(iter->d);
else
iter->fingerprint = dictFingerprint(iter->d);
}
iter->index++;
if (iter->index >= (long) ht->size) {
if (dictIsRehashing(iter->d) && iter->table == 0) {
iter->table++;
iter->index = 0;
ht = &iter->d->ht[1];
} else {
break;
}
}
iter->entry = ht->table[iter->index];
} else {
iter->entry = iter->nextEntry;
}
if (iter->entry) {
/* We need to save the 'next' here, the iterator user
* may delete the entry we are returning. */
iter->nextEntry = iter->entry->next;
return iter->entry;
}
}
return NULL;
}
//释放迭代器指针
void dictReleaseIterator(dictIterator *iter)
{
if (!(iter->index == -1 && iter->table == 0)) {
if (iter->safe)
dictResumeRehashing(iter->d);
else
assert(iter->fingerprint == dictFingerprint(iter->d));
}
zfree(iter);
}
7,扫描dict
dict的扫描算法详细可以参考其他博客,
unsigned long dictScan(dict *d,
unsigned long v,
dictScanFunction *fn,
dictScanBucketFunction* bucketfn,
void *privdata)
{
dictht *t0, *t1;
const dictEntry *de, *next;
unsigned long m0, m1;
if (dictSize(d) == 0) return 0;
/* This is needed in case the scan callback tries to do dictFind or alike. */
dictPauseRehashing(d);
if (!dictIsRehashing(d)) {
t0 = &(d->ht[0]);
m0 = t0->sizemask;
/* Emit entries at cursor */
if (bucketfn) bucketfn(privdata, &t0->table[v & m0]);
de = t0->table[v & m0];
while (de) {
next = de->next;
fn(privdata, de);
de = next;
}
/* Set unmasked bits so incrementing the reversed cursor
* operates on the masked bits */
v |= ~m0;
/* Increment the reverse cursor */
v = rev(v);
v++;
v = rev(v);
} else {
t0 = &d->ht[0];
t1 = &d->ht[1];
/* Make sure t0 is the smaller and t1 is the bigger table */
if (t0->size > t1->size) {
t0 = &d->ht[1];
t1 = &d->ht[0];
}
m0 = t0->sizemask;
m1 = t1->sizemask;
/* Emit entries at cursor */
if (bucketfn) bucketfn(privdata, &t0->table[v & m0]);
de = t0->table[v & m0];
while (de) {
next = de->next;
fn(privdata, de);
de = next;
}
/* Iterate over indices in larger table that are the expansion
* of the index pointed to by the cursor in the smaller table */
do {
/* Emit entries at cursor */
if (bucketfn) bucketfn(privdata, &t1->table[v & m1]);
de = t1->table[v & m1];
while (de) {
next = de->next;
fn(privdata, de);
de = next;
}
/* Increment the reverse cursor not covered by the smaller mask.*/
v |= ~m1;
v = rev(v);
v++;
v = rev(v);
/* Continue while bits covered by mask difference is non-zero */
} while (v & (m0 ^ m1));
}
dictResumeRehashing(d);
return v;
}
8,其他
//随机获取一个key值对象
dictEntry *dictGetRandomKey(dict *d);
//随机公平的获取一个key值对象
dictEntry *dictGetFairRandomKey(dict *d);
//根据需求数量,获取key值对象
unsigned int dictGetSomeKeys(dict *d, dictEntry **des, unsigned int count);