redis中字典的add操作（hash算法、rehash）

最新推荐文章于 2024-04-26 10:40:58 发布

bettyF

最新推荐文章于 2024-04-26 10:40:58 发布

阅读量1.7k

点赞数

分类专栏： redis源码学习文章标签： redis

本文链接：https://blog.csdn.net/bettyF/article/details/82318120

版权

redis源码学习专栏收录该内容

17 篇文章 0 订阅

订阅专栏

下面先来看看dict中的dictAdd方法：

/*
 * 三个参数：字典指针、键、值
 */
int dictAdd(dict *d, void *key, void *val)
{
    /* 新建节点,entry=null */
    dictEntry *entry = dictAddRaw(d,key,NULL);
    /* 如果entry不为null，返回1 */
    if (!entry) return DICT_ERR;
    /* 给节点赋值 */
    dictSetVal(d, entry, val);
    /* 操作成功，返回0 */
    return DICT_OK;
}

主要看看dictAddRaw：

dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing)
{
    long index;
    dictEntry *entry;
    dictht *ht; /* 指向字典中的hash表 */

    /* 判断字典此时是否正在rehash */
    if (dictIsRehashing(d)) _dictRehashStep(d);

    /* 如果新元素已经存在，那么index=-1，否则index就是新元素的下标值 */
    if ((index = _dictKeyIndex(d, key, dictHashKey(d,key), existing)) == -1)
        return NULL;

    /* 给新的entry分配内存空间并且保存新的entry，
     * 在这里，会将新的元素放在hash表的表头
     */
    /* 如果字典这是正在rehash，那么会将entry添加到ht[1]中去；否则添加到ht[0] */
    ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];
    entry = zmalloc(sizeof(*entry));
    entry->next = ht->table[index];
    ht->table[index] = entry;
    ht->used++;/* 更新hash表中used属性的值 */

    /* 设置entry的key */
    dictSetKey(d, entry, key);
    return entry;
}

判断字典是否正在rehash是根据dict中的rehashidx属性来判断，如果rehashidx=-1，那么dict没有在rehash，否则dict在rehash。

#define dictIsRehashing(d) ((d)->rehashidx != -1)

rehash

如果此时rehashidx!=-1，并且迭代器数量=0，也就是此时并没有对dict进行迭代，那么就rehash。以下就是dict的rehash方法：

int dictRehash(dict *d, int n) {
    int empty_visits = n*10; /* 空桶的最大值 */
    if (!dictIsRehashing(d)) return 0; /* 没有在rehash，那么返回0 */

    /* 遍历ht[0] */
    while(n-- && d->ht[0].used != 0) {
        /* 俩指针，分别指向当前节点和当前节点的下一节点 */
        dictEntry *de, *nextde;

        /* 判断ht[0].size>rehashidx，确保不会溢出 */
        assert(d->ht[0].size > (unsigned long)d->rehashidx);

        /* ht[0]已经被遍历完，返回1 */
        while(d->ht[0].table[d->rehashidx] == NULL) {
            d->rehashidx++;
            if (--empty_visits == 0) return 1;
        }
        
        /* 当前节点 */
        de = d->ht[0].table[d->rehashidx];
        /* 将所有节点从ht[0]移动到ht[1]中 */
        while(de) {
            uint64_t h;

            nextde = de->next;
            /* 获取该节点在ht[1]中的新的位置 */
            h = dictHashKey(d, de->key) & d->ht[1].sizemask;
            de->next = d->ht[1].table[h];
            d->ht[1].table[h] = de;
            d->ht[0].used--;
            d->ht[1].used++;
            de = nextde;
        }
        d->ht[0].table[d->rehashidx] = NULL;
        d->rehashidx++;
    }

    /* 如果ht[0]中的元素全部已经放入ht[1]，那么释放ht[0]的空间
     * 将ht[0]指向ht[1]，重置ht[1]，保证dict中总是使用ht[0]，ht[1]作为备用
     */
    if (d->ht[0].used == 0) {
        zfree(d->ht[0].table);
        d->ht[0] = d->ht[1];
        _dictReset(&d->ht[1]);
        /* rehash操作结束 */
        d->rehashidx = -1;
        return 0;
    }

    return 1;
}

在rehash中也涉及到对key的hash算法：

/* 使用redis的hashFunction来计算得到key的hash值 */
#define dictHashKey(d, key) (d)->type->hashFunction(key)

最后会重置ht[1]：

static void _dictReset(dictht *ht)
{
    ht->table = NULL;
    ht->size = 0;
    ht->sizemask = 0;
    ht->used = 0;
}

回到addDictRaw方法主体，rehash之后，通过_dictKeyIndex方法获取key的下标值：

static long _dictKeyIndex(dict *d, const void *key, uint64_t hash, dictEntry **existing)
{
    unsigned long idx, table;
    dictEntry *he;
    
    /* key已经存在，返回null */
    if (existing) *existing = NULL;

    /* 判断是否需要扩展dict */
    if (_dictExpandIfNeeded(d) == DICT_ERR)
        return -1;

    /* 遍历dict中的两个hash表 */
    for (table = 0; table <= 1; table++) {
        idx = hash & d->ht[table].sizemask;
        /* 遍历table中所有的已存在的key。判断新增的key是否已经存在 */
        he = d->ht[table].table[idx];
        while(he) {
            /* 已经存在，返回-1 */
            if (key==he->key || dictCompareKeys(d, key, he->key)) {
                if (existing) *existing = he;
                return -1;
            }
            he = he->next;
        }
        if (!dictIsRehashing(d)) break;
    }
    /* 新增的key不存在，返回新增的key的下标值，也就是hash表的sizemask=size-1 */
    return idx;
}

总结：通过阅读add方法，了解了hash算法和rehash操作。在rehash的时候，需要将ht[0]中的所有K-V移动到ht[1]中去，这个过程并不是一次性完成，而是“渐进的”。为什么这样做呢？这是因为如果ht[0]中的K-V很多，一次性的移动会对服务器的性能有影响。在rehash过程中，如果执行查询操作，那么会先去ht[0]中查找，如果没有找到，再去ht[1]中查找。rehash过程中，如果有新增操作，那么会直接保存到ht[1]中，确保ht[0]中的K-V数量只减不增。

另，在add过程中，如果两个不同的key被分配到同一个hash表中的索引上时，会产生hash冲突，解决这个冲突的方法是链地址法，与java中hashmap的解决方法相似。

bettyF

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
redis中字典的add操作（hash算法、rehash）

下面先来看看dict中的dictAdd方法：/* * 三个参数：字典指针、键、值 */int dictAdd(dict *d, void *key, void *val){ /* 新建节点,entry=null */ dictEntry *entry = dictAddRaw(d,key,NULL); /* 如果entry不为null，返回1 */ i...
复制链接

扫一扫

专栏目录