redis源码分析——5、dict实现

最新推荐文章于 2022-08-02 23:58:27 发布

繁华落尽梦一场

最新推荐文章于 2022-08-02 23:58:27 发布

阅读量205

点赞数

分类专栏：源码 redis C/C++ 文章标签： redis 数据库 database

本文链接：https://blog.csdn.net/zhangqi_gsts/article/details/121005206

版权

C/C++ 同时被 3 个专栏收录

30 篇文章 0 订阅

订阅专栏

源码

6 篇文章 0 订阅

订阅专栏

redis

6 篇文章 0 订阅

订阅专栏

dics是一种常用的数据结构，而C语言没有提供内置类型，本文一起看看redis中dict的实现

1. 怎么样自己实现一个dict

不像C++、java等高级语言内置了map，C语言并没有提供dict库，所以如果想使用dict就需要自己实现。那么实现一个dict有方法呢？

数组法
- 这也是最容易实现的一种方法，简单说就是开辟一个长度为N的大数组（通常N是一个质数），然后通过一个哈希函数计算key的哈希值d，然后用d%N得到key对于的数组下边。设想一下，如果数组开的足够大，而且哈希函数足够散列，我们可以保证不同的key的对应的下标绝不会冲突，那么这样就可以实现一个dict了。
- 上述介绍实现dict的方法在实际中有很大的限制，比如我们不知道数组开大多大合适，即使开了一百万，那么随着业务的增长，这一百万也不够。其次没有一个哈希函数能够保证不同key的哈希值不重复。
- 鉴于此，我们在用数组实现dict的时候数组长度一般是动态的，根据元素个数/数组长度这一指标来判断扩缩容，其次我们找一个尽量能打散的哈希函数。最后，对于冲突的key，我们可以简单的使用拉链法解决冲突。
红黑树法
- C++中的map底层实现就是红黑树。红黑树的核心就是尽量保证二叉树是左右平衡的，所以红黑树的实现较为复杂，需要区分各种情况旋转。不需要哈希函数，只需要实现key的比较函数就可以了，所以也就不存在key冲突。
跳表法
- 如果想实现红黑树的效果，但是又不想实现红黑树，那么跳表是个不错的选择，它的效率几乎和红黑树等价。redis中zset底层就使用的了调表，具体实现后续再说。

2. redis中dict的实现

redis中dict的实现用了上述第一种方法也就是数组拉链法，对于哈希函数，redis选用了Murmurhash2。下面看看结构定义。

typedef struct dictEntry {
    void *key;
    union {
        void *val;
        uint64_t u64;
        int64_t s64;
        double d;
    } v;
    struct dictEntry *next;
} dictEntry; /* entry可以认为是一个节点，除了存放value以外还存放了key和next，这是因为如果发生了冲突，我们需要进一步比较key */

typedef struct dictType {
    uint64_t (*hashFunction)(const void *key);
    void *(*keyDup)(void *privdata, const void *key);
    void *(*valDup)(void *privdata, const void *obj);
    int (*keyCompare)(void *privdata, const void *key1, const void *key2);
    void (*keyDestructor)(void *privdata, void *key);
    void (*valDestructor)(void *privdata, void *obj);
} dictType; /* 这里定义一些接口，可以实现不同的处理函数达，这样就达到了接口的效果 */

/* This is our hash table structure. Every dictionary has two of this as we
 * implement incremental rehashing, for the old to the new table. */
typedef struct dictht {
    dictEntry **table; /* 这儿为什么是指针的指针？ 其实是指针数组，每个元素存放的entry链表的头指针 */
    unsigned long size;
    unsigned long sizemask;
    unsigned long used;
} dictht; /* 定义了数组，table其实是个指针数组，根据下标可以访问到key*/

typedef struct dict {
    dictType *type;	/* 自定义各种字典的实现接口 */
    void *privdata; /* 自定义数据字段 */
    dictht ht[2]; /* 2个table，主要是在rehash时候用*/
    long rehashidx; /* rehashing not in progress if rehashidx == -1 */
    unsigned long iterators; /* number of iterators currently running */
} dict;

3. 基本操作

公共操作

根据key查找数组中的index

/* Returns the index of a free slot that can be populated with
 * a hash entry for the given 'key'.
 * If the key already exists, -1 is returned
 * and the optional output parameter may be filled.
 *
 * Note that if we are in the process of rehashing the hash table, the
 * index is always returned in the context of the second (new) hash table. */
/* 根据key计算其所应该在的槽位，如果key已经存在，则返回-1 
 * 函数同时传入key和哈希值两个参数，这是因为可能出现冲突，仅仅靠哈希值还不能判断是否存在，必须还要对比key是否相同
 * 如果传入的existing不为空，则existing返回已经存在的结点地址
 */
static long _dictKeyIndex(dict *d, const void *key, uint64_t hash, dictEntry **existing)
{
    unsigned long idx, table;
    dictEntry *he;
    if (existing) *existing = NULL;

    /* Expand the hash table if needed */
    if (_dictExpandIfNeeded(d) == DICT_ERR)
        return -1;
    for (table = 0; table <= 1; table++) {
        idx = hash & d->ht[table].sizemask;
        /* Search if this slot does not already contain the given key */
        he = d->ht[table].table[idx];
        while(he) {
            if (key==he->key || dictCompareKeys(d, key, he->key)) {
                if (existing) *existing = he;
                return -1;
            }
            he = he->next;
        }
        if (!dictIsRehashing(d)) break;
    }
    return idx;
}

扩容

/* Expand the hash table if needed */
static int _dictExpandIfNeeded(dict *d)
{
    /* Incremental rehashing already in progress. Return. */
    if (dictIsRehashing(d)) return DICT_OK;

    /* If the hash table is empty expand it to the initial size. */
    /* 如果数组还是空，则直接扩容到DICT_HT_INITIAL_SIZE，默认是4 */
    if (d->ht[0].size == 0) return dictExpand(d, DICT_HT_INITIAL_SIZE);

    /* If we reached the 1:1 ratio, and we are allowed to resize the hash
     * table (global setting) or we should avoid it but the ratio between
     * elements/buckets is over the "safe" threshold, we resize doubling
     * the number of buckets. */
    /* 触发扩容有几个条件
     * 如果“填充率”>=1，也就是used>=size时应该扩容，当然也要看当前容不容许扩容
     * 如果“填充率”>dict_force_resize_ratio（默认是5），也就是说平均冲突率大于5的时候就强制扩容
     */
    if (d->ht[0].used >= d->ht[0].size &&
        (dict_can_resize ||
         d->ht[0].used/d->ht[0].size > dict_force_resize_ratio))
    {
        return dictExpand(d, d->ht[0].used*2);
    }
    return DICT_OK;
}

rehash

/* 几乎所有的dict操作函数中都有_dictRehashStep调用，这其实是将扩容过程分摊了，否组扩容可能会消耗很长时间 */
static void _dictRehashStep(dict *d) {
    if (d->iterators == 0) dictRehash(d,1);
}

/* Performs N steps of incremental rehashing. Returns 1 if there are still
 * keys to move from the old to the new hash table, otherwise 0 is returned.
 *
 * Note that a rehashing step consists in moving a bucket (that may have more
 * than one key as we use chaining) from the old to the new hash table, however
 * since part of the hash table may be composed of empty spaces, it is not
 * guaranteed that this function will rehash even a single bucket, since it
 * will visit at max N*10 empty buckets in total, otherwise the amount of
 * work it does would be unbound and the function may block for a long time. */
int dictRehash(dict *d, int n) {
    int empty_visits = n*10; /* Max number of empty buckets to visit. */
    if (!dictIsRehashing(d)) return 0;

    while(n-- && d->ht[0].used != 0) {
        dictEntry *de, *nextde;

        /* Note that rehashidx can't overflow as we are sure there are more
         * elements because ht[0].used != 0 */
        assert(d->ht[0].size > (unsigned long)d->rehashidx);
        while(d->ht[0].table[d->rehashidx] == NULL) { /* 找到非空结点 */
            d->rehashidx++;
            if (--empty_visits == 0) return 1; /* 这一行在3.0是没有的，这么写是防止while卡在空结点上 */
        }
        de = d->ht[0].table[d->rehashidx];
        /* Move all the keys in this bucket from the old to the new hash HT */
        while(de) {
            uint64_t h;

            nextde = de->next;
            /* Get the index in the new hash table */
            h = dictHashKey(d, de->key) & d->ht[1].sizemask; /* 将该槽下的所有结点插入到新table的对应槽位 */
            de->next = d->ht[1].table[h];
            d->ht[1].table[h] = de;
            d->ht[0].used--;
            d->ht[1].used++;
            de = nextde;
        }
        d->ht[0].table[d->rehashidx] = NULL;
        d->rehashidx++;
    }

    /* Check if we already rehashed the whole table... */
    /* 如果h[0]的元素为空，这说明rehash完成，则将h[0]释放，且h[0]接管h[1]*/
    if (d->ht[0].used == 0) {
        zfree(d->ht[0].table);
        d->ht[0] = d->ht[1];
        _dictReset(&d->ht[1]);
        d->rehashidx = -1;
        return 0;
    }

    /* More to rehash... */
    return 1;
}

创建

/* Create a new hash table */
dict *dictCreate(dictType *type,
        void *privDataPtr)
{
    dict *d = zmalloc(sizeof(*d));

    _dictInit(d,type,privDataPtr);
    return d;
}

/* Initialize the hash table */
int _dictInit(dict *d, dictType *type,
        void *privDataPtr)
{
    _dictReset(&d->ht[0]); /* 所有成员置为0或者null*/
    _dictReset(&d->ht[1]);
    d->type = type;
    d->privdata = privDataPtr;
    d->rehashidx = -1;
    d->iterators = 0;
    return DICT_OK;
}

插入

/* 根据key新建一个entry，如果key已经存在，则返回null */
dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing)
{
    long index;
    dictEntry *entry;
    dictht *ht;

    if (dictIsRehashing(d)) _dictRehashStep(d);

    /* Get the index of the new element, or -1 if
     * the element already exists. */
    /* 对于新加结点，如果key已经存在，则直接返回NULL*/
    if ((index = _dictKeyIndex(d, key, dictHashKey(d,key), existing)) == -1)
        return NULL;

    /* Allocate the memory and store the new entry.
     * Insert the element in top, with the assumption that in a database
     * system it is more likely that recently added entries are accessed
     * more frequently. */
    /* 根据就近访问原则，新节点插入到头 */
    ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];
    entry = zmalloc(sizeof(*entry));
    entry->next = ht->table[index];
    ht->table[index] = entry;
    ht->used++;

    /* Set the hash entry fields. */
    /* 设置结点的key */
    dictSetKey(d, entry, key);
    return entry;
}

/* Add an element to the target hash table */
int dictAdd(dict *d, void *key, void *val)
{
    /* 首先根据key新建了一个entry */
    dictEntry *entry = dictAddRaw(d,key,NULL);

    if (!entry) return DICT_ERR;
    /* 给该entry设置value */
    dictSetVal(d, entry, val);
    return DICT_OK;
}

删除

/* Remove an element, returning DICT_OK on success or DICT_ERR if the
 * element was not found. */
int dictDelete(dict *ht, const void *key) {
    return dictGenericDelete(ht,key,0) ? DICT_OK : DICT_ERR;
}

/* Search and remove an element. This is an helper function for
 * dictDelete() and dictUnlink(), please check the top comment
 * of those functions. */
/* 根据key删除元素 */
static dictEntry *dictGenericDelete(dict *d, const void *key, int nofree) {
    uint64_t h, idx;
    dictEntry *he, *prevHe;
    int table;

    if (d->ht[0].used == 0 && d->ht[1].used == 0) return NULL; /* dict为空，短路返回*/

    if (dictIsRehashing(d)) _dictRehashStep(d);
    h = dictHashKey(d, key); /* 计算hash值 */

    for (table = 0; table <= 1; table++) { /* 分别变量h[0]和h[1] */
        idx = h & d->ht[table].sizemask; /* 根据key的hash值计算出key在hash tale中的槽位 */
        he = d->ht[table].table[idx]; /* 得到同hash 值的entry列表 */
        prevHe = NULL;
        while(he) {
            if (key==he->key || dictCompareKeys(d, key, he->key)) { /* 找到了key */
                /* Unlink the element from the list */
                if (prevHe)
                    prevHe->next = he->next; /* 链表的删除逻辑 */
                else
                    d->ht[table].table[idx] = he->next;
                if (!nofree) {
                    dictFreeKey(d, he);
                    dictFreeVal(d, he);
                    zfree(he);
                }
                d->ht[table].used--;
                return he;
            }
            prevHe = he;
            he = he->next;
        }
        if (!dictIsRehashing(d)) break;
    }
    return NULL; /* not found */
}

原文出处：http://ditanshow.com/articles/p/317.html

繁华落尽梦一场

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
redis源码分析——5、dict实现

dics是一种常用的数据结构，而C语言没有提供内置类型，本文一起看看redis中dict的实现1. 怎么样自己实现一个dict 不像C++、java等高级语言内置了map，C语言并没有提供dict库，所以如果想使用dict就需要自己实现。那么实现一个dict有方法呢？数组法这也是最容易实现的一种方法，简单说就是开辟一个长度为N的大数组（通常N是一个质数），然后通过一个哈希函数计算key的哈希值d，然后用d%N得到key对于的数组下边。设想一下，如果数组开的足够大，而且哈希函数足够散列，我们.
复制链接

扫一扫