Redis 2.8源码 - 字典dict的实现

俊康0813

已于 2023-11-20 18:54:46 修改

阅读量31

点赞数

分类专栏： redis 文章标签： redis 哈希算法数据库

于 2023-11-20 18:53:49 首次发布

本文链接：https://blog.csdn.net/junkangaike/article/details/134516205

版权

redis 专栏收录该内容

2 篇文章 0 订阅

订阅专栏

redis-dict字典

redis-dict字典结构

如果大家对c源码不太了解，可以先简单理解为java里面的hashmap，也有数组+链表

以redisdb作为介绍，来介绍dict

typedef struct redisDb {
    //redis分成0-15个数据库，在redis.c初始化的时候会进行初始化，每个redisDb对应一个dict
    dict *dict;                 /* The keyspace for this DB */
    //存储我们的过期的key
    dict *expires;              /* Timeout of keys with a timeout set */
    dict *blocking_keys;        /* Keys with clients waiting for data (BLPOP) */
    dict *ready_keys;           /* Blocked keys that received a PUSH */
    dict *watched_keys;         /* WATCHED keys for MULTI/EXEC CAS */
    int id;
    long long avg_ttl;          /* Average TTL, just for stats */
} redisDb;
//dict就是我们hash表的具体实现，redisDb里面就有dict的实现
typedef struct dict {
    //提供具体的函数，里面都是函数指针  主要有hash函数，key的比较
    dictType *type;
    void *privdata;
    //两个数组，下标dictht[0]用来存储具体的东西，下标dictht[1] rehash的时候使用 dictht[]
    dictht ht[2];
    //默认是-1，0的时候在rehash
    long rehashidx; /* rehashing not in progress if rehashidx == -1 */
    int iterators; /* number of iterators currently running */
} dict;
//函数指针
typedef struct dictType {
    //hash函数
    unsigned int (*hashFunction)(const void *key);
    void *(*keyDup)(void *privdata, const void *key);
    void *(*valDup)(void *privdata, const void *obj);
    //比较key
    int (*keyCompare)(void *privdata, const void *key1, const void *key2);
    void (*keyDestructor)(void *privdata, void *key);
    void (*valDestructor)(void *privdata, void *obj);
} dictType;

/* This is our hash table structure. Every dictionary has two of this as we
 * implement incremental rehashing, for the old to the new table. */
//一个dict有两个dictht ht[0]正常使用这个，ht[1]在rehash的时候使用
typedef struct dictht {
    //这个就是我们的hash表，可以理解成我们的hashmap里面的数组+链表
    dictEntry **table;
    //2^n次 
    unsigned long size;
    //2^n-1，用来算在哪个下标
    unsigned long sizemask;
    //有多少个被使用了，我set了一个值，used就+1
    unsigned long used;
} dictht;

//具体的entry的节点，类似于java里面hashmap的node节点
typedef struct dictEntry {
    //key
    void *key;
    //value
    union {
        void *val;
        uint64_t u64;
        int64_t s64;
        double d;
    } v;
    //类似hashmap，当key的hash值重复的时候链表，头插法(O1)，后面的节点
    struct dictEntry *next;
} dictEntry;

dict的具体方法解析

创建字典dictCreate

我们这边参考redis.c->main函数->initServer->Create the Redis databases, and initialize other internal state

/* Create the Redis databases, and initialize other internal state. */
	//这边的逻辑就是遍历我们的16个数据库，创建dict
    for (j = 0; j < server.dbnum; j++) {
        //我们这边只看这块逻辑，下面的不看  放入db的函数指针，privDataPtr回调使用的数据
        server.db[j].dict = dictCreate(&dbDictType,NULL);
        server.db[j].expires = dictCreate(&keyptrDictType,NULL);
        server.db[j].blocking_keys = dictCreate(&keylistDictType,NULL);
        server.db[j].ready_keys = dictCreate(&setDictType,NULL);
        server.db[j].watched_keys = dictCreate(&keylistDictType,NULL);
        server.db[j].id = j;
        server.db[j].avg_ttl = 0;
    }

/* Create a new hash table */
dict *dictCreate(dictType *type,void *privDataPtr)
{
    //分配一块空间
    dict *d = zmalloc(sizeof(*d));
	//初始化
    _dictInit(d,type,privDataPtr);
    return d;
}

/* Initialize the hash table */
int _dictInit(dict *d, dictType *type,void *privDataPtr)
{
    //初始化0,1下标
    _dictReset(&d->ht[0]);
    _dictReset(&d->ht[1]);
    //具体的函数指针
    d->type = type;
    d->privdata = privDataPtr;
    //rehash为-1
    d->rehashidx = -1;
    d->iterators = 0;
    return DICT_OK;
}
//初始化值
static void _dictReset(dictht *ht)
{
    //初始化里ht里面的table=null
    ht->table = NULL;
    ht->size = 0;
    ht->sizemask = 0;
    ht->used = 0;
}

/* Expand the hash table if needed */
static int _dictExpandIfNeeded(dict *d)
{
    /* Incremental rehashing already in progress. Return. */
    if (dictIsRehashing(d)) return DICT_OK;

    /* If the hash table is empty expand it to the initial size. */
    //如果size = 0 需要初始化  初始化大小是4
    if (d->ht[0].size == 0) return dictExpand(d, DICT_HT_INITIAL_SIZE);

    /* If we reached the 1:1 ratio, and we are allowed to resize the hash
     * table (global setting) or we should avoid it but the ratio between
     * elements/buckets is over the "safe" threshold, we resize doubling
     * the number of buckets. */
    if (d->ht[0].used >= d->ht[0].size &&
        (dict_can_resize ||
         d->ht[0].used/d->ht[0].size > dict_force_resize_ratio))
    {
        return dictExpand(d, d->ht[0].used*2);
    }
    return DICT_OK;
}

//初始化hashtable或者扩展(扩容/缩容)
/* Expand or create the hash table */
int dictExpand(dict *d, unsigned long size)
{
    dictht n; /* the new hash table */
    //获得最小的2^n次
    unsigned long realsize = _dictNextPower(size);

    /* the size is invalid if it is smaller than the number of
     * elements already inside the hash table */
    if (dictIsRehashing(d) || d->ht[0].used > size)
        return DICT_ERR;

    /* Allocate the new hash table and initialize all pointers to NULL */
    n.size = realsize;
    n.sizemask = realsize-1;
    //创建realsize的大小
    n.table = zcalloc(realsize*sizeof(dictEntry*));
    n.used = 0;

    /* Is this the first initialization? If so it's not really a rehashing
     * we just set the first hash table so that it can accept keys. */
    if (d->ht[0].table == NULL) {
        //进行初始化
        d->ht[0] = n;
        return DICT_OK;
    }

    /* Prepare a second hash table for incremental rehashing */
    //如果在reahash
    d->ht[1] = n;
    d->rehashidx = 0;
    return DICT_OK;
}


//如果存在替换，如果不存在新增
/* Add an element, discarding the old if the key already exists.
 * Return 1 if the key was added from scratch, 0 if there was already an
 * element with such key and dictReplace() just performed a value update
 * operation. */
int dictReplace(dict *d, void *key, void *val)
{
    dictEntry *entry, auxentry;

    /* Try to add the element. If the key
     * does not exists dictAdd will suceed. */
    //如果add成功返回ok,返回-1
    if (dictAdd(d, key, val) == DICT_OK)
        return 1;
    /* It already exists, get the entry */
    entry = dictFind(d, key);
    /* Set the new value and free the old one. Note that it is important
     * to do that in this order, as the value may just be exactly the same
     * as the previous one. In this context, think to reference counting,
     * you want to increment (set), and then decrement (free), and not the
     * reverse. */
    auxentry = *entry;
    //如果有这个key，更换这个key的值
    dictSetVal(d, entry, val);
    //释放原来的value
    dictFreeVal(d, &auxentry);
    return 0;
}

//查询
dictEntry *dictFind(dict *d, const void *key)
{
    dictEntry *he;
    uint64_t h, idx, table;
	
    if (d->ht[0].size == 0) return NULL; /* We don't have a table at all */
    if (dictIsRehashing(d)) _dictRehashStep(d);
    h = dictHashKey(d, key);
    for (table = 0; table <= 1; table++) {
        //开始查找
        idx = h & d->ht[table].sizemask;
        he = d->ht[table].table[idx];
        while(he) {
            if (dictCompareKeys(d, key, he->key))
                return he;
            he = he->next;
        }
        if (!dictIsRehashing(d)) return NULL;
    }
    return NULL;
}

//创建一个比 ht[0]->table 更大的 ht[1]->table ；
//将 ht[0]->table 中的所有键值对迁移到 ht[1]->table ；
//将原有 ht[0] 的数据清空，并将 ht[1] 替换为新的 ht[0] ；
//dictRehash方法
int dictRehash(dict *d, int n) {
    if (!dictIsRehashing(d)) return 0;

    while(n--) {
        dictEntry *de, *nextde;

        /* Check if we already rehashed the whole table... */
        if (d->ht[0].used == 0) {
            zfree(d->ht[0].table);
            d->ht[0] = d->ht[1];
            _dictReset(&d->ht[1]);
            d->rehashidx = -1;
            return 0;
        }

        /* Note that rehashidx can't overflow as we are sure there are more
         * elements because ht[0].used != 0 */
        assert(d->ht[0].size > (unsigned long)d->rehashidx);
        while(d->ht[0].table[d->rehashidx] == NULL) d->rehashidx++;
        de = d->ht[0].table[d->rehashidx];
        /* Move all the keys in this bucket from the old to the new hash HT */
        while(de) {
            uint64_t h;

            nextde = de->next;
            /* Get the index in the new hash table */
            h = dictHashKey(d, de->key) & d->ht[1].sizemask;
            de->next = d->ht[1].table[h];
            d->ht[1].table[h] = de;
            d->ht[0].used--;
            d->ht[1].used++;
            de = nextde;
        }
        d->ht[0].table[d->rehashidx] = NULL;
        d->rehashidx++;
    }
    return 1;
}

新增元素dictAdd

//新增一个元素,dictAdd 如果key存在就是返回异常，下面的使用replace，如果key存在直接替换
/* Add an element to the target hash table */
// d:哪一个dict  这边在处理请求的时候就是通过redis-db去用对应的dict
int dictAdd(dict *d, void *key, void *val)
{
    //见下面方法解析
    dictEntry *entry = dictAddRaw(d,key);
	//如果返回null，说明已经有元素来了
    if (!entry) return DICT_ERR;
    //如果dict->type->valDup方法存在，就是用这个方法拷贝一下，如果没有直接赋值即可entry.value值 = val
    dictSetVal(d, entry, val);
    return DICT_OK;
}

dictEntry *dictAddRaw(dict *d, void *key)
{
    long index;
    dictEntry *entry;
    dictht *ht;
	//判断是不是在rehash，就是判断rehashidx == -1
    if (dictIsRehashing(d)) _dictRehashStep(d);

    /* Get the index of the new element, or -1 if
     * the element already exists. */
    //见下面方法的解析，获得新的元素的下标，如果是-1，说明当前元素已经存在返回null
    if ((index = _dictKeyIndex(d, key)) == -1)
        return NULL;
	//index就是下标的地方，这边说明是没有数据的
    /* Allocate the memory and store the new entry */
    //如果是在rehash就是拿下标1的，如果不是拿下标0的  这边能看到ht[1]在rehash的时候才使用,如果是在rehash，直接在ht[1]上新增
    ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];
    //分配一个entry
    entry = zmalloc(sizeof(*entry));
    //头插法entry->next设置成之前的头结点
    entry->next = ht->table[index];
    //下标处设置成当前的节点
    ht->table[index] = entry;
    //used++
    ht->used++;

    /* Set the hash entry fields. */
    //设置key的值，如果有keydup的方法就设置，没有的话直接赋值
    dictSetKey(d, entry, key);
    return entry;
}



//查询这个key是否存在，如果存在返回dict->table的下标
static long _dictKeyIndex(dict *d, const void *key)
{
    unsigned long h, idx, table;
    dictEntry *he;

    /* Expand the hash table if needed */
    //判断是否需要扩容，如果used >= size，扩容成两倍
    if (_dictExpandIfNeeded(d) == DICT_ERR)
        return -1;
    /* Compute the key hash value */
    //获得hash的值，就是调用type里面的hashFunction(dictSdsHash),redis里面的所有key都是sds，字符串，这边不过多关注hash方法怎么实现
    h = dictHashKey(d, key);
    //开始遍历ht[0] ht[1]开始遍历
    for (table = 0; table <= 1; table++) {
        //获得下标可以理解成 h%(len-1)
        idx = h & d->ht[table].sizemask;
        /* Search if this slot does not already contain the given key */
        //获得当前table下面的头entry的节点
        he = d->ht[table].table[idx];
        //开始从头结点开始遍历
        while(he) {
            //比较key，如果key一样返回-1 type里面的hashFunction(dictSdsHash)  keyCompare的方法
            if (dictCompareKeys(d, key, he->key))
                return -1;
            //找链表的下一个节点
            he = he->next;
        }
        if (!dictIsRehashing(d)) break;
    }
    return idx;
}

//判断是否需要扩容
static int _dictExpandIfNeeded(dict *d)
{
    /* Incremental rehashing already in progress. Return. */
    if (dictIsRehashing(d)) return DICT_OK;

    /* If the hash table is empty expand it to the initial size. */
    //如果大小为 == 0，说明是初始化
    if (d->ht[0].size == 0) return dictExpand(d, DICT_HT_INITIAL_SIZE);

    /* If we reached the 1:1 ratio, and we are allowed to resize the hash
     * table (global setting) or we should avoid it but the ratio between
     * elements/buckets is over the "safe" threshold, we resize doubling
     * the number of buckets. */
    //如果used >= size && dict_can_resize = 1(true)
    if (d->ht[0].used >= d->ht[0].size &&
        (dict_can_resize ||
         d->ht[0].used/d->ht[0].size > dict_force_resize_ratio))
    {
        //这边是扩容的方法，我们后面继续看，先回到前面
        return dictExpand(d, d->ht[0].used*2);
    }
    return DICT_OK;
}

//扩容/初始化 hash表
/* Expand or create the hash table */
int dictExpand(dict *d, unsigned long size)
{
    dictht n; /* the new hash table */
    //size获得2^n次
    unsigned long realsize = _dictNextPower(size);

    /* the size is invalid if it is smaller than the number of
     * elements already inside the hash table */
    if (dictIsRehashing(d) || d->ht[0].used > size)
        return DICT_ERR;

    /* Allocate the new hash table and initialize all pointers to NULL */
    n.size = realsize;
    n.sizemask = realsize-1;
    n.table = zcalloc(realsize*sizeof(dictEntry*));
    n.used = 0;

    /* Is this the first initialization? If so it's not really a rehashing
     * we just set the first hash table so that it can accept keys. */
    //如果是初始化，直接ht[0]赋值
    if (d->ht[0].table == NULL) {
        d->ht[0] = n;
        return DICT_OK;
    }

    /* Prepare a second hash table for incremental rehashing */
    //如果是rehash，放到ht[1]里面，rehashidx设置成0 ，前面判断是否在rehash的时候
    d->ht[1] = n;
    d->rehashidx = 0;
    return DICT_OK;
}

#define dictHashKey(d, key) (d)->type->hashFunction(key)

dictReplace(key存在的时候，替换value)

int dictReplace(dict *d, void *key, void *val)
{
    dictEntry *entry, auxentry;
	//先来一遍ADD,如果是ADD，说明是新增的，如果已经存在返回ERR
    if (dictAdd(d, key, val) == DICT_OK)
        return 1;
    entry = dictFind(d, key);
    auxentry = *entry;
    //设置value值
    dictSetVal(d, entry, val);
    dictFreeVal(d, &auxentry);
    return 0;
}

//寻找这个key
dictEntry *dictFind(dict *d, const void *key)
{
    dictEntry *he;
    uint64_t h, idx, table;
	//如果size == 0,说明没有数据
    if (d->ht[0].size == 0) return NULL; /* We don't have a table at all */
    //rehash见下面
    if (dictIsRehashing(d)) _dictRehashStep(d);
    //获得下标
    h = dictHashKey(d, key);
    //遍历数组+链表获得dictEntry,前面的_dictKeyIndex，只是获得了这个下标
    for (table = 0; table <= 1; table++) {
        idx = h & d->ht[table].sizemask;
        he = d->ht[table].table[idx];
        while(he) {
            if (dictCompareKeys(d, key, he->key))
                //如果key一样直接返回
                return he;
            he = he->next;
        }
        if (!dictIsRehashing(d)) return NULL;
    }
    return NULL;
}

扩容dictRehash(rehash)

主要就是dict的迁移，从ht[0]->ht[1],然后把引用换一下，rehashidx 重新设置成-1，不是需要扩容的那次请求干完全部的事情，是每个人进来都rehash一次，就是下个有数据的下标的链表节点

static void _dictRehashStep(dict *d) {
    //每个进来都干一次活
    if (d->iterators == 0) dictRehash(d,1);
}

int dictRehash(dict *d, int n) {
    //如果这个时候不需要rehash就退出
    if (!dictIsRehashing(d)) return 0;
	
    while(n--) {
        dictEntry *de, *nextde;

        /* Check if we already rehashed the whole table... */
        // 如果used == 0,说明
        if (d->ht[0].used == 0) {
            zfree(d->ht[0].table);
            d->ht[0] = d->ht[1];
            _dictReset(&d->ht[1]);
            d->rehashidx = -1;
            return 0;
        }

        /* Note that rehashidx can't overflow as we are sure there are more
         * elements because ht[0].used != 0 */
        assert(d->ht[0].size > (unsigned long)d->rehashidx);
        //获得一个table下标不为null的dictEntry的链表
        while(d->ht[0].table[d->rehashidx] == NULL) d->rehashidx++;
        //获得头结点
        de = d->ht[0].table[d->rehashidx];
        /* Move all the keys in this bucket from the old to the new hash HT */
        while(de) {
            uint64_t h;

            nextde = de->next;
            /* Get the index in the new hash table */
            //上面介绍到，如果dictExpand是初始化在初始化在ht[0]，如果不是初始化是初始化ht[1] size,sizemask为2*原来的大小
            h = dictHashKey(d, de->key) & d->ht[1].sizemask;
            //开始迁移，也许在原来的位置，或者去了原来的位置+原来的size,ht[0].used--,当为0的时候说明已经全部都迁移完了
            de->next = d->ht[1].table[h];
            d->ht[1].table[h] = de;
            d->ht[0].used--;
            d->ht[1].used++;
            de = nextde;
        }
        d->ht[0].table[d->rehashidx] = NULL;
        //这边说明rehashidx，等下一个来rehash
        d->rehashidx++;
    }
    return 1;
}

删除节点dictDelete

int dictDelete(dict *ht, const void *key) {
    return dictGenericDelete(ht,key,0);
}

//查到元素，然后删除
static int dictGenericDelete(dict *d, const void *key, int nofree)
{
    uint64_t h, idx;
    dictEntry *he, *prevHe;
    int table;

    if (d->ht[0].size == 0) return DICT_ERR; /* d->ht[0].table is NULL */
    if (dictIsRehashing(d)) _dictRehashStep(d);
    h = dictHashKey(d, key);

    for (table = 0; table <= 1; table++) {
        idx = h & d->ht[table].sizemask;
        he = d->ht[table].table[idx];
        prevHe = NULL;
        //遍历链表然后删除
        while(he) {
            if (dictCompareKeys(d, key, he->key)) {
                /* Unlink the element from the list */
                if (prevHe)
                    prevHe->next = he->next;
                else
                    d->ht[table].table[idx] = he->next;
                if (!nofree) {
                    dictFreeKey(d, he);
                    dictFreeVal(d, he);
                }
                zfree(he);
                d->ht[table].used--;
                return DICT_OK;
            }
            prevHe = he;
            he = he->next;
        }
        if (!dictIsRehashing(d)) break;
    }
    return DICT_ERR; /* not found */
}