redis hash的数据结构和实现过程

最新推荐文章于 2023-08-27 08:00:00 发布

一定迟到逗

最新推荐文章于 2023-08-27 08:00:00 发布

阅读量283

点赞数

分类专栏： c++ cdn redis

c++ 同时被 3 个专栏收录

16 篇文章 1 订阅

订阅专栏

cdn

3 篇文章 0 订阅

订阅专栏

redis

2 篇文章 0 订阅

订阅专栏

1、数据结构

/*Hash表一个节点包含Key,Value数据对 */
typedef struct dictEntry {
    void *key;
    union {
        void *val;
        uint64_t u64;
        int64_t s64;
        double d;
    } v;
    struct dictEntry *next; /* 指向下一个节点, 链接表的方式解决Hash冲突 */
} dictEntry;
 
/* 存储不同数据类型对应不同操作的回调函数 */
typedef struct dictType {
    unsigned int (*hashFunction)(const void *key);
    void *(*keyDup)(void *privdata, const void *key);
    void *(*valDup)(void *privdata, const void *obj);
    int (*keyCompare)(void *privdata, const void *key1, const void *key2);
    void (*keyDestructor)(void *privdata, void *key);
    void (*valDestructor)(void *privdata, void *obj);
} dictType;
 
typedef struct dictht {
    dictEntry **table; /* dictEntry*数组,Hash表 */
    unsigned long size; /* Hash表总大小 */
    unsigned long sizemask; /* 计算在table中索引的掩码, 值是size-1 */
    unsigned long used; /* Hash表已使用的大小 */
} dictht;
 
typedef struct dict {
    dictType *type;
    void *privdata;
    dictht ht[2]; /* 两个hash表,rehash时使用*/
    long rehashidx; /* rehash的索引, -1表示没有进行rehash */
    int iterators; /*  */
} dict;

2、数据结构图

转自：https://blog.csdn.net/yanshuanche3765/article/details/82121037
在这里插入图片描述

3、实现过程的主要函数

1、dictCreate dict创建

dict *dictCreate(dictType *type,
        void *privDataPtr)
{
    dict *d = zmalloc(sizeof(*d));  //机智的使用*d 则sizeof 为d的类型，不是指针的大小。
 
    _dictInit(d,type,privDataPtr);//利用该函数对新建的hash表进行初始化。
    return d;
}

2、_dictInit 接上面的初始化

int _dictInit(dict *d, dictType *type,
        void *privDataPtr)
{
    _dictReset(&d->ht[0]); //初试化两个表，详见下面的函数,下面都是简单的赋值。
    _dictReset(&d->ht[1]);
    d->type = type;
    d->privdata = privDataPtr;
    d->rehashidx = -1;
    d->iterators = 0;
    return DICT_OK;
}

static void _dictReset(dictht *ht)
{
    ht->table = NULL;
    ht->size = 0;
    ht->sizemask = 0;
    ht->used = 0;
}

3、dictAdd hash表中添加元素,首先判断空间是否足够, 然后计算key对应的hash值, 然后将需要添加的key和value放入表中.

int dictAdd(dict *d, void *key, void *val)
{
     /*添加入hash表中, 返回新添加元素的实体结构体*/
    dictEntry *entry = dictAddRaw(d,key);
 
    if (!entry) return DICT_ERR;
     /*元素val值放入元素实体结构中*/
    dictSetVal(d, entry, val);
    return DICT_OK;
}
/*
*添加元素实体函数
*/
dictEntry *dictAddRaw(dict *d, void *key)
{
    int index;
    dictEntry *entry;
    dictht *ht;
 
    if (dictIsRehashing(d)) _dictRehashStep(d);
 
    /*根据key值计算新元素在hash表中的索引, 返回-1则表示元素已存在, 直接返回NULL*/
    if ((index = _dictKeyIndex(d, key)) == -1)
        return NULL;
 
    /*如果在进行rehash过程,则新元素添加到ht[1]中, 否则添加到ht[0]中 */
    ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];
    entry = zmalloc(sizeof(*entry));
    entry->next = ht->table[index];
    ht->table[index] = entry;
    ht->used++;
 
    /*设置元素key*/
    dictSetKey(d, entry, key);
    return entry;
}
/*
*计算索引的函数
*/
static int _dictKeyIndex(dict *d, const void *key)
{
    unsigned int h, idx, table;
    dictEntry *he;
 
    /* 判断hash表是否空间足够, 不足则需要扩展 */
    if (_dictExpandIfNeeded(d) == DICT_ERR)
        return -1;
         
    /* 计算key对应的hash值 */
    h = dictHashKey(d, key);
    for (table = 0; table <= 1; table++) {
          /*计算索引*/
        idx = h & d->ht[table].sizemask;
        /*遍历冲突列表, 判断需要查找的key是否已经在冲突列表中*/
        he = d->ht[table].table[idx];
        while(he) {
            if (dictCompareKeys(d, key, he->key))
                return -1;
            he = he->next;
        }
        if (!dictIsRehashing(d)) break;
    }
    return idx;
}

4、dictExpand 扩容实际操作，扩容了但是并没有移动元素。移动元素被分散化了。

/*
*判断hash表是否需要扩展空间
*/
static int _dictExpandIfNeeded(dict *d)
{
    /*redis的rehash采用的渐进式hash, rehash时分配了原来两倍的内存空间, 在rehash阶段空间必定够用*/
    if (dictIsRehashing(d)) return DICT_OK;
 
    /* hash表是空的需要初始化空间, 默认是4*/
    if (d->ht[0].size == 0) return dictExpand(d, DICT_HT_INITIAL_SIZE);
 
    /* 已使用空间满足不了设置的条件*/
    if (d->ht[0].used >= d->ht[0].size &&
        (dict_can_resize ||
         d->ht[0].used/d->ht[0].size > dict_force_resize_ratio))
    {
          /*扩展空间, 使用空间的两倍*/
        return dictExpand(d, d->ht[0].used*2);
    }
    return DICT_OK;
}
 
/*
*扩展空间或者初始化hash表空间
*/
int dictExpand(dict *d, unsigned long size)
{
    dictht n;
     /* 对需要分配大小圆整为2的倍数 */
    unsigned long realsize = _dictNextPower(size);
 
    /* 如果空间足够则表明调用错误 */
    if (dictIsRehashing(d) || d->ht[0].used > size)
        return DICT_ERR;
 
    n.size = realsize;
    n.sizemask = realsize-1;
    n.table = zcalloc(realsize*sizeof(dictEntry*));
    n.used = 0;
    
     /*hash表为空初始化hash表*/
    if (d->ht[0].table == NULL) {
        d->ht[0] = n;
        return DICT_OK;
    }
 
    /*新分配的空间放入ht[1], 后面一步一步进行rehash*/
    d->ht[1] = n;
    d->rehashidx = 0;
    return DICT_OK;
}

5、dictRehash dict的更新操作，分散更新。

int dictRehash(dict *d, int n) {//hash表的更新操作，更新n个元素
    int empty_visits = n*10; /* Max number of empty buckets to visit. */
    //更新一次最多查找到的空的桶的次数
    //用于保证能在一定时间内更新完完成
    if (!dictIsRehashing(d)) return 0; //若表状态不属于转移态，则说明没有元素可以移动
 
    while(n-- && d->ht[0].used != 0) {//n次数以及旧表使用次数不为0
        dictEntry *de, *nextde;
 
        /* Note that rehashidx can't overflow as we are sure there are more
         * elements because ht[0].used != 0 */
        assert(d->ht[0].size > (unsigned long)d->rehashidx);//保证旧表长度大于跟新的下标
        while(d->ht[0].table[d->rehashidx] == NULL) {
            d->rehashidx++;
            if (--empty_visits == 0) return 1;
        }
        /*上述循环控制了空的查找次数10*n 即empty_visits当空元素多于该次数
        则跳出*/
        de = d->ht[0].table[d->rehashidx];//获得非空的下标
        /* Move all the keys in this bucket from the old to the new hash HT */
        while(de) {//将拉链的所有元素，重新添加到新表中
            uint64_t h;
 
            nextde = de->next;
            /* Get the index in the new hash table */
            h = dictHashKey(d, de->key) & d->ht[1].sizemask;
            /*redis规则 通过hash函数获取的哈希值 与 表的长度-1 直接得到
                元素下标。由于容量为2的幂级数 那么sizemask为容量-1
                可得出sizemask为二进制全1的数
                与操作是取出来hash的低位，舍去了高位。
            */
            de->next = d->ht[1].table[h];//头插法
            d->ht[1].table[h] = de;
            d->ht[0].used--;//更新使用次数
            d->ht[1].used++;
            de = nextde;//赋值下一个元素
        }
        d->ht[0].table[d->rehashidx] = NULL;
        //修改完拉链元素后，将旧表置为空
        d->rehashidx++;
        //更新 更新状态下标 一次更新操作完成
    }
 
    /* Check if we already rehashed the whole table... */
    if (d->ht[0].used == 0) { //判断旧表元素是否全部更新完成
    //完成则释放旧表,将新表 旧表替换操作，然后，初始化新表。
        zfree(d->ht[0].table);
        d->ht[0] = d->ht[1];
        _dictReset(&d->ht[1]);
        d->rehashidx = -1;
        return 0;
    }
 
    /* More to rehash... */
    return 1;
}

6、查找元素过程,首先计算hash值, 然后计算在ht[0]和ht[1]中索引位置, 进行查找.

dictEntry *dictFind(dict *d, const void *key)
{
    dictEntry *he;
    unsigned int h, idx, table;
 
    if (d->ht[0].size == 0) return NULL;
    
     /*如果正在进行rehash, 执行一次rehash*/
    if (dictIsRehashing(d)) _dictRehashStep(d);
    
    h = dictHashKey(d, key);
    
     /*由于可能正在rehash, 因此要从ht[0]和ht[1]中分别进行查找, 找不到返回NULL*/
    for (table = 0; table <= 1; table++) {
        idx = h & d->ht[table].sizemask;
        he = d->ht[table].table[idx];
          /*遍历冲突列表查找元素*/
        while(he) {
            if (dictCompareKeys(d, key, he->key))
                return he;
            he = he->next;
        }
        if (!dictIsRehashing(d)) return NULL;
    }
    return NULL;
}

7、dictGenericDelete 该方法主要用于dictunlink 与dictdelete 前者用于拿到该元素并且从表中脱离，后者直接删除该元素dictunlink 返回的为元素而dictdelete返回的为真假。两者调用删除的区别为nofree是否为1。

static dictEntry *dictGenericDelete(dict *d, const void *key, int nofree) {
    //删除相同key的一个元素操作
    uint64_t h, idx;
    dictEntry *he, *prevHe;
    int table;
 
    if (d->ht[0].used == 0 && d->ht[1].used == 0) return NULL;
    //若两个表为空 则返回NULL
    if (dictIsRehashing(d)) _dictRehashStep(d);
    //若处于更新态，则进行单步更新
    h = dictHashKey(d, key);
    //获取hash值
 
    for (table = 0; table <= 1; table++) {
        idx = h & d->ht[table].sizemask;//拿到元素的坐标
        he = d->ht[table].table[idx];//得到该key下的第一个元素
        prevHe = NULL;//用于存储前面的元素
        while(he) {
            if (key==he->key || dictCompareKeys(d, key, he->key)) {
                //若key相同则进行删除操作
                /* Unlink the element from the list */
                if (prevHe)
                    prevHe->next = he->next;
                else
                    d->ht[table].table[idx] = he->next;
                //unlink该元素
                if (!nofree) {
                    //若没有定义nofree 则释放空间
                    dictFreeKey(d, he);
                    dictFreeVal(d, he);
                    zfree(he);
                }
                d->ht[table].used--;
                //更新使用值
                return he;//返回删除的元素，释放空间以后he应该为原来的地址，并不为NULL
            }
            prevHe = he;//继续查找
            he = he->next;
        }
        if (!dictIsRehashing(d)) break;
        //当不为更新态则说明th[1]为空 可以跳出
        //当为更新态时则需要同时查两个表
    }
    return NULL; /* not found */
}

/*_dictClear 删除整个表*/
/* Destroy an entire dictionary */
int _dictClear(dict *d, dictht *ht, void(callback)(void *)) {
    //删除整个表
    unsigned long i;
 
    /* Free all the elements */
    for (i = 0; i < ht->size && ht->used > 0; i++) {
        dictEntry *he, *nextHe;
 
        if (callback && (i & 65535) == 0) callback(d->privdata);
        //使用回调元素 
 
        if ((he = ht->table[i]) == NULL) continue; 
        //当桶为空则继续遍历
        while(he) {
            //释放所有的拉链元素
            nextHe = he->next;
            dictFreeKey(d, he);
            dictFreeVal(d, he);
            zfree(he);
            ht->used--;
            he = nextHe;
        }
    }
    /* Free the table and the allocated cache structure */
    zfree(ht->table);//释放表空间
    /* Re-initialize the table */
    _dictReset(ht);//重置表
    return DICT_OK; /* never fails */
}

8、下面关于iterator的一些实现，这里多了一个dictFingerprint的元素。从源码来看，该值是根据当前表的状态获取的，通过两个表的地址，容量与使用的空间即6个值进行Tomas Wang’s 64 bit integer hash 该hash函数获得hash值。估计是用于保证迭代器的有效性。当表有了改动，则迭代器就无效了。

dictIterator *dictGetIterator(dict *d)
{
    dictIterator *iter = zmalloc(sizeof(*iter));
    //分配空间
    iter->d = d; //初始化表
    iter->table = 0;//表下标
    iter->index = -1;//当前拉链下标
    iter->safe = 0;//是否为安全
    iter->entry = NULL;//当前元素
    iter->nextEntry = NULL;//下一个元素
    return iter;
}
 
dictIterator *dictGetSafeIterator(dict *d) {
    //只是将safe置为1
    dictIterator *i = dictGetIterator(d);
 
    i->safe = 1;
    return i;
}
 
dictEntry *dictNext(dictIterator *iter)
{
    while (1) {
        if (iter->entry == NULL) {//若迭代器所指元素为空则需要进行下标加一与一系列判断操作
            dictht *ht = &iter->d->ht[iter->table];
            if (iter->index == -1 && iter->table == 0) {
                //若是初始化的情况
                //若为安全迭代器将表记录的迭代器数量加一
                //否则记录该迭代器的指纹
                if (iter->safe)
                    iter->d->iterators++;
                else
                    iter->fingerprint = dictFingerprint(iter->d);
            }
            //下标加一
            iter->index++;
            if (iter->index >= (long) ht->size) {
                //若大于表的大小，两种情况
                //查找完了素有元素，或者在更新的状态，查找完了旧表，还需要查找新表
                if (dictIsRehashing(iter->d) && iter->table == 0) {
                    //赋值到查找新表的值
                    iter->table++;
                    iter->index = 0;
                    ht = &iter->d->ht[1];
                } else {
                    //查完了所有元素跳出
                    break;
                }
            }
            iter->entry = ht->table[iter->index];
            //拿到下一个桶
        } else {
            iter->entry = iter->nextEntry;
            //更新到下一个元素
        }
        if (iter->entry) {
            /* We need to save the 'next' here, the iterator user
             * may delete the entry we are returning. */
            //若元素存在 则直接返回该元素，并且更新nextEntry
            iter->nextEntry = iter->entry->next;
            return iter->entry;
        }
    }
    return NULL;//若没有元素则返回空
}
 
void dictReleaseIterator(dictIterator *iter)
{
    //师范迭代器比较简单
    if (!(iter->index == -1 && iter->table == 0)) {
        if (iter->safe)
            iter->d->iterators--;
        else
            assert(iter->fingerprint == dictFingerprint(iter->d));
    }
 
    zfree(iter);
    //释放空间
}

Redis哈希的实现比较难理解，为了提升性能，采用分治的思想。当哈希桶需要扩容的时候，并不是直接全部更新，而是定义两个哈希表，新表旧表。当处于扩容状态时，每进行一次插入或查找操作就进行一次更新操作，即把原始表的一个数据移到新的表中。当所有数据迁移完成后，新表变成旧表，旧表变成新表。这样在扩容的过程就变成了分步进行。减少了因扩容而产生的长时间等待。

原文：https://blog.csdn.net/qq_30085733/article/details/79843175

一定迟到逗

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
redis hash的数据结构和实现过程

1、数据结构/*Hash表一个节点包含Key,Value数据对 */typedef struct dictEntry { void *key; union { void *val; uint64_t u64; int64_t s64; double d; } v; struct dictEntry ...
复制链接

扫一扫