redis源码解析 hash字典- dict （6.2.3版本）

最新推荐文章于 2022-11-18 15:35:40 发布

换个昵称都难

最新推荐文章于 2022-11-18 15:35:40 发布

阅读量266

点赞数

分类专栏： redis 集群

本文链接：https://blog.csdn.net/qazwsxwtc/article/details/119386211

版权

redis 同时被 2 个专栏收录

7 篇文章 0 订阅

订阅专栏

集群

7 篇文章 0 订阅

订阅专栏

hash字典，kv存储，用于快速查询，redis中的源码为 dict.h和dict.c， redis hash字典采用渐进式hash实现，具体渐进式hash，请搜索相关博文阅读。

1，redis hash字典的结构体声明

typedef struct dictEntry {//字典实体节点
    void *key;  //字典实体节点key
    union {
        void *val;
        uint64_t u64;
        int64_t s64;
        double d;
    } v;//数据联合体，其中val为值实际值的指针，用于通用结构
    struct dictEntry *next; //下一个字典实体节点指针
} dictEntry;

typedef struct dictType {//字典操作
    uint64_t (*hashFunction)(const void *key);//hash函数
    void *(*keyDup)(void *privdata, const void *key);//key复制函数
    void *(*valDup)(void *privdata, const void *obj);//val值复制函数
    int (*keyCompare)(void *privdata, const void *key1, const void *key2);//key比较函数
    void (*keyDestructor)(void *privdata, void *key);//key指针析构销毁函数
    void (*valDestructor)(void *privdata, void *obj);//val指针析构销毁函数
    int (*expandAllowed)(size_t moreMem, double usedRatio);//是否允许扩容
} dictType;

/* This is our hash table structure. Every dictionary has two of this as we
 * implement incremental rehashing, for the old to the new table. */
typedef struct dictht {//hash表结构体
    dictEntry **table;//hash表的指针数组
    unsigned long size;//hash数组大小
    unsigned long sizemask;//hash数组长度掩码， sizemask = size-1
    unsigned long used;//hash表的kv对个数，已经使用的容量，如果used==size,再插入的时候就需要扩容
} dictht;

typedef struct dict {//hash字典结构
    dictType *type;//字典类型，实际为字典的函数指针，用于特化字典相关操作
    void *privdata;//私有数据
    dictht ht[2]; //字典hashtable数组，长度为2，主要用于渐进式rehash计算
    long rehashidx; //渐进式hash，下一个要迁移的桶索引，当为-1时不进行/* rehashing not in progress if rehashidx == -1 */
    int16_t pauserehash; //表示rehash是暂停滴，安全的迭代需要停止/* If >0 rehashing is paused (<0 indicates coding error) */
} dict;

/* If safe is set to 1 this is a safe iterator, that means, you can call
 * dictAdd, dictFind, and other functions against the dictionary even while
 * iterating. Otherwise it is a non safe iterator, and only dictNext()
 * should be called while iterating. */
typedef struct dictIterator {//迭代器
    dict *d;//字典结构体指针
    long index;//dictht->table 指针数组的下标
    int table; //dict->ht 的下标
    int safe;//如果safe设置为1，就是一个安全的迭代器，可以进行dict的添加删除rehash等需要变更迭代器的操作；如果为0，就只能dictNext操作
    dictEntry *entry;//当前字典实体对象
    dictEntry *nextEntry;//下一个实体对象
    /* unsafe iterator fingerprint for misuse detection. */
    long long fingerprint;//dict.c里的dictFingerprint()，对不安全的iterator误用控制
} dictIterator;

2，hash字典的基本方法（创建/添加/修改/删除）

2.1，创建--创建hash字典实现，

主要初始化各个结构成员，赋初始值

//重置ht
static void _dictReset(dictht *ht)
{
    ht->table = NULL;
    ht->size = 0;
    ht->sizemask = 0;
    ht->used = 0;
}

/* Create a new hash table */
dict *dictCreate(dictType *type,
        void *privDataPtr)
{
    dict *d = zmalloc(sizeof(*d));//因为是malloc  后期需要调用dictRease释放

    _dictInit(d,type,privDataPtr);
    return d;
}

/* Initialize the hash table */
int _dictInit(dict *d, dictType *type,
        void *privDataPtr)
{
    _dictReset(&d->ht[0]);
    _dictReset(&d->ht[1]);
    d->type = type;
    d->privdata = privDataPtr;
    d->rehashidx = -1;
    d->pauserehash = 0;
    return DICT_OK;
}

2.2，添加--添加元素到hash字典

/* Add an element to the target hash table */
int dictAdd(dict *d, void *key, void *val)
{
    dictEntry *entry = dictAddRaw(d,key,NULL);

    if (!entry) return DICT_ERR;//添加失败，返回
    dictSetVal(d, entry, val);//添加成功，设置值到新节点
    return DICT_OK;
}

2.3，修改--替换元素的值，如果不存在就添加元素到hash字典

/* Add or Overwrite:
 * Add an element, discarding the old value if the key already exists.
 * Return 1 if the key was added from scratch, 0 if there was already an
 * element with such key and dictReplace() just performed a value update
 * operation. */
int dictReplace(dict *d, void *key, void *val)
{
    dictEntry *entry, *existing, auxentry;

    /* Try to add the element. If the key
     * does not exists dictAdd will succeed. */
    entry = dictAddRaw(d,key,&existing);
    if (entry) {//如果添加成功
        dictSetVal(d, entry, val);//把值设置到新节点中
        return 1;
    }
   
    /* Set the new value and free the old one. Note that it is important
     * to do that in this order, as the value may just be exactly the same
     * as the previous one. In this context, think to reference counting,
     * you want to increment (set), and then decrement (free), and not the
     * reverse. */
   //如果添加不成功，就替换旧值
    auxentry = *existing;
    dictSetVal(d, existing, val);//设置新值
    dictFreeVal(d, &auxentry);//释放旧值
    return 0;
}

2.4，删除--删除key值得元素

int dictDelete(dict *ht, const void *key) {
    return dictGenericDelete(ht,key,0) ? DICT_OK : DICT_ERR;
}

/* Search and remove an element. This is an helper function for
 * dictDelete() and dictUnlink(), please check the top comment
 * of those functions. */
static dictEntry *dictGenericDelete(dict *d, const void *key, int nofree) {
    uint64_t h, idx;
    dictEntry *he, *prevHe;
    int table;

    if (d->ht[0].used == 0 && d->ht[1].used == 0) return NULL;

    if (dictIsRehashing(d)) _dictRehashStep(d);//如果当前dict表在rehash中，就需要进行rehash处理

    //计算key的hash值
    h = dictHashKey(d, key);

    for (table = 0; table <= 1; table++) {
        idx = h & d->ht[table].sizemask;//计算出hash槽位的索引
        he = d->ht[table].table[idx];//取值
        prevHe = NULL;
        while(he) {
            if (key==he->key || dictCompareKeys(d, key, he->key)) {
                /* Unlink the element from the list */
                if (prevHe)
                    prevHe->next = he->next;
                else
                    d->ht[table].table[idx] = he->next;
                if (!nofree) {//释放需要释放删除的节点
                    dictFreeKey(d, he);
                    dictFreeVal(d, he);
                    zfree(he);
                }
                d->ht[table].used--;
                return he;
            }
            prevHe = he;
            he = he->next;
        }
        if (!dictIsRehashing(d)) break;
    }
    return NULL; /* not found */
}

3，hash字典其他常用操作（查询/清空/释放）

3.1，查询

提供了两个函数：查询一个字典对象实体函数，查询具体值得函数

//获取当前key的 dict
dictEntry *dictFind(dict *d, const void *key)
{
    dictEntry *he;
    uint64_t h, idx, table;

    if (dictSize(d) == 0) return NULL; /* dict is empty */
    if (dictIsRehashing(d)) _dictRehashStep(d);
    h = dictHashKey(d, key);//获取键值得hash值
    for (table = 0; table <= 1; table++) {
        idx = h & d->ht[table].sizemask;
        he = d->ht[table].table[idx];
        while(he) {
            if (key==he->key || dictCompareKeys(d, key, he->key))
                return he;
            he = he->next;
        }
        if (!dictIsRehashing(d)) return NULL;
    }
    return NULL;
}

//获取实际的值
void *dictFetchValue(dict *d, const void *key) {
    dictEntry *he;

    he = dictFind(d,key);
    return he ? dictGetVal(he) : NULL;
}

3.2 清空字典

清空字典，只是清空内容，hash字典对象还存在

void dictEmpty(dict *d, void(callback)(void*)) {
    _dictClear(d,&d->ht[0],callback);
    _dictClear(d,&d->ht[1],callback);
    d->rehashidx = -1;
    d->pauserehash = 0;
}

3.3 释放字典

释放之前会清空相关数据，也会释放字典malloc的内存

/* Clear & Release the hash table */
void dictRelease(dict *d)
{
    _dictClear(d,&d->ht[0],NULL);
    _dictClear(d,&d->ht[1],NULL);
    zfree(d);
}

4，渐进式rehash

hash表在数据量过大超过当前hash表大小的时候，就会扩容；反之如果hash表很大，而存储的kv值又很少就会缩容，在扩容和缩容的时候就需要进行rehash。由于redis是一个效率kv库，不可能一次性把一个表表的数据扩容到另一个hash表，这样会很耗时，所以采用了渐进式rehash算法。

redis rehash的触发条件为：

负载因子Q=ht[0].used / ht[0].size。

缩容触发：负载因子Q<=0.1时触发。

扩容触发：没有在进行持久化，负载因子Q>=1时触发；在进行持久化，负载因子Q>=5时触发。持久化时，由于redis是写时复制，因此会多出许多节点，因此负载因子要大于等于5

dict 在进行添加删除的时候，会被动触发rehash，也能主动触发rehash；并且如果dict在rehash中，添加删除等变更操作的逻辑都会进行rehash计算操作

4.1，扩容缩容函数


/* Resize the table to the minimal size that contains all the elements,
 * but with the invariant of a USED/BUCKETS ratio near to <= 1 */
int dictResize(dict *d)
{
    unsigned long minimal;

    if (!dict_can_resize || dictIsRehashing(d)) return DICT_ERR;
    minimal = d->ht[0].used;
    if (minimal < DICT_HT_INITIAL_SIZE)
        minimal = DICT_HT_INITIAL_SIZE;
    return dictExpand(d, minimal);
}

4.2 rehash函数

/* Performs N steps of incremental rehashing. Returns 1 if there are still
 * keys to move from the old to the new hash table, otherwise 0 is returned.
 *
 * Note that a rehashing step consists in moving a bucket (that may have more
 * than one key as we use chaining) from the old to the new hash table, however
 * since part of the hash table may be composed of empty spaces, it is not
 * guaranteed that this function will rehash even a single bucket, since it
 * will visit at max N*10 empty buckets in total, otherwise the amount of
 * work it does would be unbound and the function may block for a long time. */
int dictRehash(dict *d, int n) {
    int empty_visits = n*10; /* Max number of empty buckets to visit. */
    if (!dictIsRehashing(d)) return 0;

    while(n-- && d->ht[0].used != 0) {
        dictEntry *de, *nextde;

        /* Note that rehashidx can't overflow as we are sure there are more
         * elements because ht[0].used != 0 */
        assert(d->ht[0].size > (unsigned long)d->rehashidx);
        while(d->ht[0].table[d->rehashidx] == NULL) {
            d->rehashidx++;
            if (--empty_visits == 0) return 1;
        }
        de = d->ht[0].table[d->rehashidx];
        /* Move all the keys in this bucket from the old to the new hash HT */
        while(de) {
            uint64_t h;

            nextde = de->next;
            /* Get the index in the new hash table */
            h = dictHashKey(d, de->key) & d->ht[1].sizemask;
            de->next = d->ht[1].table[h];
            d->ht[1].table[h] = de;
            d->ht[0].used--;
            d->ht[1].used++;
            de = nextde;
        }
        d->ht[0].table[d->rehashidx] = NULL;
        d->rehashidx++;
    }

    /* Check if we already rehashed the whole table... */
    if (d->ht[0].used == 0) {
        zfree(d->ht[0].table);
        d->ht[0] = d->ht[1];
        _dictReset(&d->ht[1]);
        d->rehashidx = -1;
        return 0;
    }

    /* More to rehash... */
    return 1;
}

5，hash算法

hash算法的具体实现，在siphash.c文件


uint64_t dictGenHashFunction(const void *key, int len);

uint64_t dictGenCaseHashFunction(const unsigned char *buf, int len);

/* The default hashing function uses SipHash implementation
 * in siphash.c. */

uint64_t siphash(const uint8_t *in, const size_t inlen, const uint8_t *k);
uint64_t siphash_nocase(const uint8_t *in, const size_t inlen, const uint8_t *k);

//对字符串进行hash
uint64_t dictGenHashFunction(const void *key, int len) {
    return siphash(key,len,dict_hash_function_seed);
}
//对字符串进行hash，不区分大小写
uint64_t dictGenCaseHashFunction(const unsigned char *buf, int len) {
    return siphash_nocase(buf,len,dict_hash_function_seed);

6，迭代器

迭代器，大意思想可以参考stl的迭代器作用，因为C没有容器stl，所以自己开发了迭代器

//获取一个迭代器，其实就是生成一个，并赋初始值
dictIterator *dictGetIterator(dict *d)
{
    dictIterator *iter = zmalloc(sizeof(*iter));

    iter->d = d;
    iter->table = 0;
    iter->index = -1;
    iter->safe = 0;
    iter->entry = NULL;
    iter->nextEntry = NULL;
    return iter;
}

//获取一个安全类型的迭代器
dictIterator *dictGetSafeIterator(dict *d) {
    dictIterator *i = dictGetIterator(d);

    i->safe = 1;
    return i;
}

//获取迭代器的下一个实体dict
dictEntry *dictNext(dictIterator *iter)
{
    while (1) {
        if (iter->entry == NULL) {
            dictht *ht = &iter->d->ht[iter->table];
            if (iter->index == -1 && iter->table == 0) {
                if (iter->safe)
                    dictPauseRehashing(iter->d);
                else
                    iter->fingerprint = dictFingerprint(iter->d);
            }
            iter->index++;
            if (iter->index >= (long) ht->size) {
                if (dictIsRehashing(iter->d) && iter->table == 0) {
                    iter->table++;
                    iter->index = 0;
                    ht = &iter->d->ht[1];
                } else {
                    break;
                }
            }
            iter->entry = ht->table[iter->index];
        } else {
            iter->entry = iter->nextEntry;
        }
        if (iter->entry) {
            /* We need to save the 'next' here, the iterator user
             * may delete the entry we are returning. */
            iter->nextEntry = iter->entry->next;
            return iter->entry;
        }
    }
    return NULL;
}

//释放迭代器指针
void dictReleaseIterator(dictIterator *iter)
{
    if (!(iter->index == -1 && iter->table == 0)) {
        if (iter->safe)
            dictResumeRehashing(iter->d);
        else
            assert(iter->fingerprint == dictFingerprint(iter->d));
    }
    zfree(iter);
}

7，扫描dict

dict的扫描算法详细可以参考其他博客，

unsigned long dictScan(dict *d,
                       unsigned long v,
                       dictScanFunction *fn,
                       dictScanBucketFunction* bucketfn,
                       void *privdata)
{
    dictht *t0, *t1;
    const dictEntry *de, *next;
    unsigned long m0, m1;

    if (dictSize(d) == 0) return 0;

    /* This is needed in case the scan callback tries to do dictFind or alike. */
    dictPauseRehashing(d);

    if (!dictIsRehashing(d)) {
        t0 = &(d->ht[0]);
        m0 = t0->sizemask;

        /* Emit entries at cursor */
        if (bucketfn) bucketfn(privdata, &t0->table[v & m0]);
        de = t0->table[v & m0];
        while (de) {
            next = de->next;
            fn(privdata, de);
            de = next;
        }

        /* Set unmasked bits so incrementing the reversed cursor
         * operates on the masked bits */
        v |= ~m0;

        /* Increment the reverse cursor */
        v = rev(v);
        v++;
        v = rev(v);

    } else {
        t0 = &d->ht[0];
        t1 = &d->ht[1];

        /* Make sure t0 is the smaller and t1 is the bigger table */
        if (t0->size > t1->size) {
            t0 = &d->ht[1];
            t1 = &d->ht[0];
        }

        m0 = t0->sizemask;
        m1 = t1->sizemask;

        /* Emit entries at cursor */
        if (bucketfn) bucketfn(privdata, &t0->table[v & m0]);
        de = t0->table[v & m0];
        while (de) {
            next = de->next;
            fn(privdata, de);
            de = next;
        }

        /* Iterate over indices in larger table that are the expansion
         * of the index pointed to by the cursor in the smaller table */
        do {
            /* Emit entries at cursor */
            if (bucketfn) bucketfn(privdata, &t1->table[v & m1]);
            de = t1->table[v & m1];
            while (de) {
                next = de->next;
                fn(privdata, de);
                de = next;
            }

            /* Increment the reverse cursor not covered by the smaller mask.*/
            v |= ~m1;
            v = rev(v);
            v++;
            v = rev(v);

            /* Continue while bits covered by mask difference is non-zero */
        } while (v & (m0 ^ m1));
    }

    dictResumeRehashing(d);

    return v;
}

8，其他

//随机获取一个key值对象
dictEntry *dictGetRandomKey(dict *d);

//随机公平的获取一个key值对象
dictEntry *dictGetFairRandomKey(dict *d);

//根据需求数量，获取key值对象
unsigned int dictGetSomeKeys(dict *d, dictEntry **des, unsigned int count);