Redis dict.c源码阅读

最新推荐文章于 2024-07-15 17:23:34 发布

波伏鲸

最新推荐文章于 2024-07-15 17:23:34 发布

阅读量850

点赞数 20

分类专栏：数据库文章标签： redis c语言数据库

本文链接：https://blog.csdn.net/2201_75390740/article/details/134820751

版权

数据库专栏收录该内容

1 篇文章 0 订阅

订阅专栏

dict.c解读

文章目录

dict.c解读
一、dict 概念
二、dict 的结构
三、dict 的 API
四、dict 的 private functions
五、debugging
总结

一、dict 概念

1.背景

Redis是一个使用ANSI C编写的开源、支持网络、基于内存、分布式、可选持久性的键值对存储数据库。

从2015年6月开始，Redis的开发由Redis Labs赞助，而2013年5月至2015年6月期间，其开发由Pivotal赞助。

在2013年5月之前，其开发由VMware赞助。

根据月度排行网站DB-Engines.com的数据，Redis是最流行的键值对存储数据库。

2. 一些定义

字典，是一种用于实现键值对（key-value pair）保存的抽象数据结构，通过字典，可以在单个键（key）与单个值（value）之间进行关联（或者说是将键映射成值），而这些关联的键与值即为键值对。
对于键值对，字典中的每个键都是独一无二的，程序可以在字典中根据键查找与之关联的值，或者通过键来更新值，又或者根据键来删除整个键值对等。
映射过程，通常使用hash算法实现，因此也称映射过程为哈希化，存放记录的数组叫做散列，或哈希表。
冲突：对不同的关键字，可能得到同一个散列地址，即不同的key散列到同一个数组下标（即哈希桶）。处理方法，最常用的是拉链法，即在冲突的下标处，维护一个链表，所有映射到该下标的记录，都添加到该链表上。也就是说，一个哈希表可以有多个哈希结点，每一个哈希节点保存了一组键值对。
哈希桶：dict由数组+链表构成，数组的每个元素占用的槽位就叫做哈希桶（bucket），也就是数组下标，当出现散列冲突，就会在这个桶下挂一个链表，解决冲突。

哈希桶解决冲突举例，即拉链法：

哈希桶解决冲突举例，即拉链法

3. 如何实现

Redis字典dict 的底层实现，本质就是用数组+链表，实现了分布式哈希表。

当不同的关键字、散列到数组相同的位置，用链表维护冲突的记录。当冲突记录越来越多、链表越来越长，遍历列表的效率就会降低，此时需要考虑将链表的长度变短。

将链表的长度变短，一个最直接有效的方式就是扩容数组。将数组+链表结构中的数组扩容，数组变长、对应数组下标就增多了；将原数组中所有非空的索引下标、搬运到扩容后的新数组，经过重新散列，自然就把冲突的链表变短了。即rehash过程。

二、dict 的结构

1.哈希表结点

哈希结点，也称为桶，在dict.c/dictEntry中进行了定义：

//实际存放数据的地方
struct dictEntry {
    //键
    void *key;
    //值
    union {
        void *val;
        uint64_t u64;
        int64_t s64;
        double d;
    } v;
    // 指向下一个哈希节点，构成单向链表，用以解决键冲突（链地址法）
    struct dictEntry *next;    
};

key：记录哈希值对应的数组位置。
v：即值（value），当它的值是uint64_t、int64_t或double类型时，就不再需要额外的存储，这有利于减少内存碎片。v也可以是void指针，以便能存储任何类型的数据。
next：指向另一个dictEntry结构

2.字典

字典，在dict.h/dict中定义：

//字典结构体
struct dict {
	//type中包含一系列哈希表需要用到的函数
    dictType *type;
	
	//指向实际的哈希表记录
    dictEntry **ht_table[2];
    //哈希表中记录数量
    unsigned long ht_used[2];
	
	//当rehashidx值为1时，不进行rehash
    long rehashidx; 

    //在末尾保留小变量以获得最佳（最小）结构填充
    //当pauserehash大于0，rehashing暂停，小于0则表示编码错误
    int16_t pauserehash; 
    //大小指数，size = 2的exp次方
    signed char ht_size_exp[2]; 
};

type：指向dictType结构的指针，每个 dictType 结构保存了一组用于操纵特定类型键值对的函数，而 Redis 会为不同用途的字典设置不同的类型特定函数。
ht_table：每一个字典（dict）都有两个哈希表结点（dictEntry），用来实现渐进式 rehash ，记录保存在 ht_table 数组当中。并且，在字典当中，一般只会使用 ht_table[0] 哈希表，ht_table[1]中没有任何数据，只有在对 ht_table[0] 哈希表进行 rehash 时才会用到 ht_table[1]。
ht_used：用于记录两个哈希表中维护冲突的记录，分别储存在ht_used[0]和ht_used[1]中。
rehashidx：用来记录rehash进度。当rehash没有在进行的时候，rehashidx = -1。

其中，dictType函数在dict.h/dictType中定义：

//dictType结构包含若干函数指针，用于对key和value的各种操作进行自定义
typedef struct dictType {
	//计算哈希值
    uint64_t (*hashFunction)(const void *key);
    //复制键
    void *(*keyDup)(dict *d, const void *key);
    //复制值
    void *(*valDup)(dict *d, const void *obj);
    //对比键
    int (*keyCompare)(dict *d, const void *key1, const void *key2);
    //销毁键
    void (*keyDestructor)(dict *d, void *key);
    //销毁值
    void (*valDestructor)(dict *d, void *obj);
    //在字典初始化/rehashing开始时调用（已创建旧 ht 和新 ht）
    int (*expandAllowed)(size_t moreMem, double usedRatio);
    //在字典初始化/rehashing从旧 ht 到新 ht 的所有条目结束时调用。
    //这两个 ht 仍然存在，并在此回调后被清理。
    void (*rehashingStarted)(dict *d);
    //如果设置了“no_value”标志，则表示未使用值，即字典是集合。
    //设置此标志后，无法访问 dictEntry 的值，也无法使用 dictSetKey（）。条目元数据也不能使用。
    void (*rehashingCompleted)(dict *d);
    //如果 no_value = 1，并且所有键都是奇数 （LSB=1），则设置 keys_are_odd = 1 
    //将启用另一个优化：在没有分配 dictEntry 的情况下存储键。
    unsigned int no_value:1;
    //TODO：添加一个“keys_are_even”标志，如果设置了该标志，则使用类似的优化。
    unsigned int keys_are_odd:1;
} dictType;

hashFunction，对key进行哈希值计算的哈希算法。
keyDup和valDup，分别定义key和value的拷贝函数，用于在需要的时候对key和value进行深拷贝，而不仅仅是传递对象指针。
keyCompare，定义两个key的比较操作，在根据key进行查找时会用到。
keyDestructor和valDestructor，分别定义对key和value的销毁函数。

3.字典迭代器

字典迭代器（dictIterator），在dict.h/dictIterator中定义：

//字典迭代器
typedef struct dictIterator {
	//被迭代的字典
    dict *d;
	//迭代器当前所指的哈希表的索引位置
    long index;
    //table : 正在被迭代的哈希表，值可以是 0 或 1 。
    //safe : 标识这个迭代器是否安全
    int table, safe;

	//entry : 当前迭代到的哈希结点
	//nextentry : 当前迭代到的哈希节点的下一个哈希结点
    dictEntry *entry, *nextEntry;
   
    //用于误用检测的不安全迭代器指纹。
    unsigned long long fingerprint;
} dictIterator;

迭代器的作用就是遍历字典，遍历对象为dictEntry结点。安全的迭代器会阻止迭代过程中的rehash。

d：指向dict结构的指针，保存了当前迭代器正在处理的字典。
index：用来表示迭代器当前所指向的哈希表的索引位置。
table：用来表示当前正在被迭代的哈希表，即dict结构中的dictEntry数组ht_table的下标，取值有 0 或 1 。
safe：用来标识这个迭代器是否安全，safe=1时，该迭代器安全，可以在迭代时对字典进行增删改等操作，即调用 dictDelete、dictFind等函数，否则，不安全的迭代器只能使用 dictNext 遍历函数，而不能对字典进行修改。
entry：指向dictEntry结构的指针，指向当前迭代到的哈希节点。
nextEntry：指向dictEntry结构的指针，指向当前迭代到的哈希节点的下一个哈希节点。这个属性存在的原因是，在安全迭代器运行时，entry 指针所指向的节点可能会被修改，所以需要一个指针来保存下一节点的位置，来防止指针丢失。
fingerprint：字典的指纹，用于误用检测，即在不安全的迭代器对字典进行操作后，与操作前的指纹值进行对比，若不同，则说明在迭代过程中有对字典进行增删操作，而这些操作在不安全的迭代器中是不被允许的。

4.宏定义常量

//字典的操作状态
//操作成功
#define DICT_OK 0
//操作失败
#define DICT_ERR 1
//每个哈希表的初始大小
#define DICT_HT_INITIAL_EXP      2
#define DICT_HT_INITIAL_SIZE     (1<<(DICT_HT_INITIAL_EXP))

DICT_OK：字典操作状态，表示操作成功。
DICT_ERR：字典操作状态，表示操作失败或出错。
DICT_HT_INITIAL_EXP：哈希表的初始大小的指数级别为2 。
DICT_HT_INITIAL_SIZE：哈希表的初始大小为以2为底数，DICT_HT_INITIAL_EXP值为指数的数字的值，即2²=4。

三、dict 的 API

（一）哈希函数

//哈希函数
//设置哈希种子
void dictSetHashFunctionSeed(uint8_t *seed) {
    memcpy(dict_hash_function_seed,seed,sizeof(dict_hash_function_seed));
}
//获取哈希种子
uint8_t *dictGetHashFunctionSeed(void) {
    return dict_hash_function_seed;
}
//对字符串进行哈希
uint64_t dictGenHashFunction(const void *key, size_t len) {
    return siphash(key,len,dict_hash_function_seed);
}
//对大小写不敏感的字符串进行哈希
uint64_t dictGenCaseHashFunction(const unsigned char *buf, size_t len) {
    return siphash_nocase(buf,len,dict_hash_function_seed);

（二）常用 API

1. dict 的创建

//这四个函数都为 dict 的创建做准备，彼此相互套用
//重设Hash表，设置好相应值
/* Reset hash table parameters already initialized with _dictInit()*/
static void _dictReset(dict *d, int htidx)
{
    d->ht_table[htidx] = NULL;
    d->ht_size_exp[htidx] = -1;
    d->ht_used[htidx] = 0;
}

//创建一个新的 Hash 表，为表分配存储空间，初始化表的内部结构
/* Create a new hash table */
dict *dictCreate(dictType *type)
{
    dict *d = zmalloc(sizeof(*d));
    _dictInit(d,type);
    return d;
}

//创建字典数组
/* Create an array of dictionaries */
dict **dictCreateMultiple(dictType *type, int count)
{
    dict **d = zmalloc(sizeof(dict*) * count);
    for (int i = 0; i < count; i++) {
        d[i] = dictCreate(type);
    }
    return d;
}


//初始化 Hash 表
/* Initialize the hash table */
int _dictInit(dict *d, dictType *type)
{
	//_dictReset函数初始化dict的两个表格
    _dictReset(d, 0);
    _dictReset(d, 1);
    //给字典的内部结构赋值，即初始化
    d->type = type;
    d->rehashidx = -1;
    d->pauserehash = 0;
    //返回初始化成功
    return DICT_OK;
}

d：操作对象。
type：字典类型。
htidx：标志字典是否正在rehash，如果是值为1，不是为0 。用来表明操作字典中的第几个哈希表。

2. 对 dict 进行操作的函数

刚创建好的dict 不能存储任何数据，其两个哈希表的size都为0，所以要对字典进行一些操作。

（1）dictResize

缩小给定的字典的负载因子，即 ratio = ht_table[0].used / ht_table[0].size，让其比例接近于 1 : 1。

//将表的大小调整为包含所有元素的最小大小，但 USED/SIZE 比率不变，接近 <= 1
int dictResize(dict *d)
{
    unsigned long minimal;
	
	//dict_can_resize是一个标志，在dict.c文件开头用静态常量定义。
	//如果dict_can_resize不等于enable，或者字典满足rehash条件，返回err
    if (dict_can_resize != DICT_RESIZE_ENABLE || dictIsRehashing(d)) return DICT_ERR;
    //最小大小设置为哈希表的冲突记录数，如果该值小于哈希表初始值4
    //则将minimal设置为4，并返回dictExpand函数对字典进行扩容处理。
    minimal = d->ht_used[0];
    if (minimal < DICT_HT_INITIAL_SIZE)
        minimal = DICT_HT_INITIAL_SIZE;
    return dictExpand(d, minimal);
}

注：负载因子：就是used与size的比值，也称装载因子（load factor）。dictEntry中用used记录了冲突的记录数，size记录了数组大小，这个比值越大，哈希值冲突概率越高。当比值超过某个阈值，会强制进行rehash。这个阈值为5，在dict.c文件开头用静态常量定义。

//对一些变量进行解释
//定义dict_can_resize 的值
static dictResizeEnable dict_can_resize = DICT_RESIZE_ENABLE;
//负载因子的最大值
static unsigned int dict_force_resize_ratio = 5;

//其中，DICT_RESIZE_ENABLE是一个enum常量，在dict.h中定义
typedef enum {
    DICT_RESIZE_ENABLE,
    DICT_RESIZE_AVOID,
    DICT_RESIZE_FORBID,
} dictResizeEnable;

（2）_dictExpand

创建或者扩容hash表。该函数不能与rehashing操作同时进行，也不能强制缩容。在使用_dictNextExp函数得到需要的size之后，它先是使用一个临时变量n去分配空间，然后进行判断，若ht[0].table的值为NULL，则认为是刚create出来的dict，直接把n赋值给ht[0]，否则给ht[1]，并开始rehashing操作。

int _dictExpand(dict *d, unsigned long size, int* malloc_failed)
{
    if (malloc_failed) *malloc_failed = 0;

    //若d正在rehash或者已有元素数量大于扩容后的容量，则返回err
    if (dictIsRehashing(d) || d->ht_used[0] > size)
        return DICT_ERR;

    //创建新的哈希表，声明新表used变量，设置新表大小的指数级别（_dictNextExp函数下面会讲）
    dictEntry **new_ht_table;
    unsigned long new_ht_used;
    signed char new_ht_size_exp = _dictNextExp(size);

    //计算新表的容量大小，若小于给定size值或者内存空间不够，返回err
    size_t newsize = 1ul<<new_ht_size_exp;
    if (newsize < size || newsize * sizeof(dictEntry*) < newsize)
        return DICT_ERR;

    //如果新表size的指数级别等于原表，则扩容无效，返回err
    if (new_ht_size_exp == d->ht_size_exp[0]) return DICT_ERR;

    //判断是否分配内存失败，是则重新分配内存
    if (malloc_failed) {
        new_ht_table = ztrycalloc(newsize*sizeof(dictEntry*));
        *malloc_failed = new_ht_table == NULL;
        if (*malloc_failed)
            return DICT_ERR;
    } else
        new_ht_table = zcalloc(newsize*sizeof(dictEntry*));

    new_ht_used = 0;
	//判断dict是否是第一次初始化，即是否创建ht_table[0]
	//若是，则该d为新创建的dict，进而设置d的各个结构。返回操作成功
    if (d->ht_table[0] == NULL) {
        if (d->type->rehashingStarted) d->type->rehashingStarted(d);
        if (d->type->rehashingCompleted) d->type->rehashingCompleted(d);
        d->ht_size_exp[0] = new_ht_size_exp;
        d->ht_used[0] = new_ht_used;
        d->ht_table[0] = new_ht_table;
        return DICT_OK;
    }

    //如果不是新表，即ht_table[1]，给各个结构赋值，并开始rehashing。
    d->ht_size_exp[1] = new_ht_size_exp;
    d->ht_used[1] = new_ht_used;
    d->ht_table[1] = new_ht_table;
    d->rehashidx = 0;
    if (d->type->rehashingStarted) d->type->rehashingStarted(d);
    return DICT_OK;
}

d：操作对象。
size：该函数建立的hash表容量大小。
malloc_failed：当 malloc_failed 为非 NULL 时，如果 malloc 失败，它将避免出现恐慌（在这种情况下，它将设置为 1）。

（3）dictRehash

调用dictExpand函数进入rehashing状态后，rehashing 操作将会把ht[0]里，rehashidx的值对应的bucket下的所有dictEntry，移至ht[1]，之后对rehashidx进行自增处理。当ht[0]->used为0时，认为ht[0]的所有dictEntry已经移至ht[1]，此时return 0，否则 return 1，告诉调用者，还需要继续进行rehashing操作。同时，rehashing时允许最多跳过10n的空bucket，就要退出流程。假设传入的n=1，即只进行一次rehashing操作

int dictRehash(dict *d, int n) {
    //rehash n*10个桶数量的元素
    int empty_visits = n*10; /* Max number of empty buckets to visit. */
    //判断d是否符合rehashing条件
    unsigned long s0 = DICTHT_SIZE(d->ht_size_exp[0]);
    unsigned long s1 = DICTHT_SIZE(d->ht_size_exp[1]);
    if (dict_can_resize == DICT_RESIZE_FORBID || !dictIsRehashing(d)) return 0;
    if (dict_can_resize == DICT_RESIZE_AVOID && 
        ((s1 > s0 && s1 / s0 < dict_force_resize_ratio) ||
         (s1 < s0 && s0 / s1 < dict_force_resize_ratio)))
    {
        return 0;
    }
	
	//d符合rehash条件，开始rehash
    while(n-- && d->ht_used[0] != 0) {
        dictEntry *de, *nextde;
        
        assert(DICTHT_SIZE(d->ht_size_exp[0]) > (unsigned long)d->rehashidx);
        //当前桶已空，将rehashidx++，对下一个桶进行迁移
        while(d->ht_table[0][d->rehashidx] == NULL) {
            d->rehashidx++;
            //该次rehash结束，判断表中是否还有元素
            if (--empty_visits == 0) return 1;
        }
        //将当前桶迁移给de
        de = d->ht_table[0][d->rehashidx];
        //将这个桶中所有的key都迁移到新表
        while(de) {
            uint64_t h;
			//用nextde记录当前桶的下一个桶，key指向de的key
            nextde = dictGetNext(de);
            void *key = dictGetKey(de);
            //得到新表的索引
            h = dictHashKey(d, key) & DICTHT_SIZE_MASK(d->ht_size_exp[1]);
            
            d->ht_table[1][h] = de;
            d->ht_used[0]--;
            d->ht_used[1]++;
            de = nextde;
        }
        d->ht_table[0][d->rehashidx] = NULL;
        d->rehashidx++;
    }

    //检查是否rehash完毕
    if (d->ht_used[0] == 0) {
        if (d->type->rehashingCompleted) d->type->rehashingCompleted(d);
        zfree(d->ht_table[0]);
        //将新表复制到旧表
        d->ht_table[0] = d->ht_table[1];
        d->ht_used[0] = d->ht_used[1];
        d->ht_size_exp[0] = d->ht_size_exp[1];
        _dictReset(d, 1);
        //重置rehashidx为-1
        d->rehashidx = -1;
        return 0;
    }

    /* More to rehash... */
    return 1;
}

d：操作对象
n：rehash元素数量为n*10 。

rehashing操作的触发有两种方式：
1.定时操作：

long long timeInMilliseconds(void) {
    struct timeval tv;

    gettimeofday(&tv,NULL);
    return (((long long)tv.tv_sec)*1000)+(tv.tv_usec/1000);
}

//以 ms+“delta” 毫秒为单位进行重新哈希。“delta”的值大于 0
//并且在大多数情况下小于 1。确切的上限取决于 dictRehash（d，100） 的运行时间。
int dictRehashMilliseconds(dict *d, unsigned int ms) {
    if (d->pauserehash > 0) return 0;

    monotime timer;
    elapsedStart(&timer);
    int rehashes = 0;

    while(dictRehash(d,100)) {
        rehashes += 100;
        if (elapsedMs(timer) >= ms) break;
    }
    return rehashes;
}

外部传入一个毫秒时间，在这时间内循环执行rehashing，每次执行100次。

2.操作时触发：

static void _dictRehashStep(dict *d) {
    if (d->pauserehash == 0) dictRehash(d,1);
}

此函数由字典中的常见查找或更新操作调用，以便哈希表在主动使用时自动从ht_table[0]迁移到 ht_table[1]。
注：此函数仅执行重新散列的步骤，并且仅当哈希表的哈希尚未暂停时。当我们在重新散列过程中有迭代器时，我们不能弄乱两个散列表，否则可能会遗漏或重复一些元素。

（4）增加元素

向哈希表中增加元素。这里说明三个函数。dictAdd函数向目标哈希表中添加元素，其内部调用dictAddRaw函数，创建新键，添加空值的entry。dictAddRaw函数内部又调用dictInsertAtPosition函数，创建key。

dictAdd 函数

//向目标哈希表添加元素
int dictAdd(dict *d, void *key, void *val)
{
    //调用dictAddRaw函数，添加无值的entry
    dictEntry *entry = dictAddRaw(d,key,NULL);

    if (!entry) return DICT_ERR;
    if (!d->type->no_value) dictSetVal(d, entry, val);
    return DICT_OK;
}

dictAddRaw 函数

//此函数添加entry，但不是设置值，而是将entry返回给用户
//这将确保按照用户的意愿填充值字段。
dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing)
{
    //用私人函数dictFindPositionForInsert得到新键的位置
    //再用dictInsertAtPosition函数在该位置添加一个键
    void *position = dictFindPositionForInsert(d, key, existing);
    if (!position) return NULL;

    /* Dup the key if necessary. */
    if (d->type->keyDup) key = d->type->keyDup(d, key);

    return dictInsertAtPosition(d, key, position);
}

dictInsertAtPosition 函数

//在字典的哈希表中，在前面调用 dictFindPositionForInsert 返回的位置添加一个键。
dictEntry *dictInsertAtPosition(dict *d, void *key, void *position) {
	//这是一个桶，但被该API函数隐藏了
    dictEntry **bucket = position; 
    dictEntry *entry;
    
    //如果正在进行rehashing，则在表 1 中插入，否则在表 0 中插入。
    int htidx = dictIsRehashing(d) ? 1 : 0;
    assert(bucket >= &d->ht_table[htidx][0] &&
           bucket <= &d->ht_table[htidx][DICTHT_SIZE_MASK(d->ht_size_exp[htidx])]);
    if (d->type->no_value) {
        if (d->type->keys_are_odd && !*bucket) {
            //将key直接存储在目标桶中，无需分配entry
            entry = key;
            assert(entryIsKey(entry));
        } else {
            entry = createEntryNoValue(key, *bucket);
        }
    } else {
        //头插法，在顶部插入元素，节约插入时的时间消耗
        //分配存储空间
        entry = zmalloc(sizeof(*entry));
        assert(entryIsNormal(entry)); /* Check alignment of allocation */
        entry->key = key;
        entry->next = *bucket;
    }
    *bucket = entry;
    d->ht_used[htidx]++;

    return entry;
}

d：操作对象。
key：键对应变量。
val：值对应变量。
position：键对应位置。

（5）替换元素

向Hash表中增加一个元素，如果Hash表中已经有该元素的话，则将该元素进行替换掉。

int dictReplace(dict *d, void *key, void *val)
{
    dictEntry *entry, *existing;

    //增加元素，如果键不存在，则在添加键并设置键对应的值
    entry = dictAddRaw(d,key,&existing);
    if (entry) {
        dictSetVal(d, entry, val);
        return 1;
    }
    
    //设置新值并释放旧值
    void *oldval = dictGetVal(existing);
    dictSetVal(d, existing, val);
    if (d->type->valDestructor)
        d->type->valDestructor(d, oldval);
    return 0;
}

d：操作对象。
key：键对应变量。
val：值对应变量。

（6）删除操作

删除指定的key

static dictEntry *dictGenericDelete(dict *d, const void *key, int nofree) {
    uint64_t h, idx;
    dictEntry *he, *prevHe;
    int table;

    /* dict is empty */
    if (dictSize(d) == 0) return NULL;

	//返回key对应的dictEntry
    if (dictIsRehashing(d)) _dictRehashStep(d);
    h = dictHashKey(d, key);

    for (table = 0; table <= 1; table++) {
        idx = h & DICTHT_SIZE_MASK(d->ht_size_exp[table]);
        if (table == 0 && (long)idx < d->rehashidx) continue;
        he = d->ht_table[table][idx];
        prevHe = NULL;
        while(he) {
            void *he_key = dictGetKey(he);
            if (key == he_key || dictCompareKeys(d, key, he_key)) {
                /* Unlink the element from the list */
                if (prevHe)
                    dictSetNext(prevHe, dictGetNext(he));
                else
                    d->ht_table[table][idx] = dictGetNext(he);
                if (!nofree) {
                	//如果需要释放，则进行相应的释放操作
                    dictFreeUnlinkedEntry(d, he);
                }
                //记录数相应减小
                d->ht_used[table]--;
                return he;
            }
            //进行相应的赋值操作
            prevHe = he;
            he = dictGetNext(he);
        }
        if (!dictIsRehashing(d)) break;
    }
    //返回错误
    return NULL; /* not found */
}

d：操作对象。
key：指定的键。

删除指定的value

int dictDelete(dict *ht, const void *key) {
    return dictGenericDelete(ht,key,0) ? DICT_OK : DICT_ERR;
}

d：操作对象。
key：指定的键。

删除指定的元素

dictUnlink函数从表中删除一个元素，但不实际释放键、值和字典条目。
此函数适用于：当我们想从哈希表中删除某些内容，但想在实际删除条目之前使用其值时，此函数很有用。

dictEntry *dictUnlink(dict *d, const void *key) {
    return dictGenericDelete(d,key,1);
}

d：操作对象。
key：指定的键。

使用完成后，再调用dictFreeUnlinkedEntry函数以释放该entry。

void dictFreeUnlinkedEntry(dict *d, dictEntry *he) {
    if (he == NULL) return;
    dictFreeKey(d, he);
    dictFreeVal(d, he);
    if (!entryIsKey(he)) zfree(decodeMaskedPtr(he));
}

d：操作对象。
he：要删除的桶。

删除整个字典

删除整个字典数据，保留字典结构

int _dictClear(dict *d, int htidx, void(callback)(dict*)) {
    unsigned long i;
	//释放所有元素
    /* Free all the elements */
    for (i = 0; i < DICTHT_SIZE(d->ht_size_exp[htidx]) && d->ht_used[htidx] > 0; i++) {
        dictEntry *he, *nextHe;

        if (callback && (i & 65535) == 0) callback(d);
		//如果哈希表中没有链表元素
        if ((he = d->ht_table[htidx][i]) == NULL) continue;
        while(he) {
            nextHe = dictGetNext(he);
            //释放key和value，以及结构体
            dictFreeKey(d, he);
            dictFreeVal(d, he);
            if (!entryIsKey(he)) zfree(decodeMaskedPtr(he));
            d->ht_used[htidx]--;
            he = nextHe;
        }
    }
    /* Free the table and the allocated cache structure */
    zfree(d->ht_table[htidx]);
    //重新初始化整个哈希表
    _dictReset(d, htidx);
    return DICT_OK; /* never fails */
}

d：操作对象。
htidx：标志当前操作哈希表是表[0]还是表[1]。

删除哈希表

void dictRelease(dict *d)
{
	//清除两个哈希表中的数据，并释放其空间，即彻底删除
    _dictClear(d,0,NULL);
    _dictClear(d,1,NULL);
    zfree(d);
}

d：操作对象。

（7）查找

查找指定key对应的entry。

dictEntry *dictFind(dict *d, const void *key)
{
    dictEntry *he;
    uint64_t h, idx, table;
	
	//哈希表为空
    if (dictSize(d) == 0) return NULL; /* dict is empty */
    //渐进式rehash
    if (dictIsRehashing(d)) _dictRehashStep(d);
    h = dictHashKey(d, key);
    for (table = 0; table <= 1; table++) {
    	//具体索引
        idx = h & DICTHT_SIZE_MASK(d->ht_size_exp[table]);
        if (table == 0 && (long)idx < d->rehashidx) continue; 
        he = d->ht_table[table][idx];
        while(he) {
            void *he_key = dictGetKey(he);
            //存在需要查找的dictEntry
            if (key == he_key || dictCompareKeys(d, key, he_key))
            	//返回entry
                return he;
            he = dictGetNext(he);
        }
        //在rehash需要再查
        if (!dictIsRehashing(d)) return NULL;
    }
    return NULL;
}

d：操作对象。
key：指定的键，查找对应的entry。

查找指定key对应的value值

void *dictFetchValue(dict *d, const void *key) {
    dictEntry *he;

    he = dictFind(d,key);
    return he ? dictGetVal(he) : NULL;
}

d：操作对象。
key：指定的键，查找对应的entry。

（8）一些简单函数

设置key和value

void dictSetKey(dict *d, dictEntry* de, void *key) {
    assert(!d->type->no_value);
    if (d->type->keyDup)
        de->key = d->type->keyDup(d, key);
    else
        de->key = key;
}

void dictSetVal(dict *d, dictEntry *de, void *val) {
    assert(entryHasValue(de));
    de->v.val = d->type->valDup ? d->type->valDup(d, val) : val;
}

d：操作对象。
de：相应entry。
key：要设置的key。
val：要设置的value。

获得key和value

void *dictGetKey(const dictEntry *de) {
    if (entryIsKey(de)) return (void*)de;
    if (entryIsNoValue(de)) return decodeEntryNoValue(de)->key;
    return de->key;
}

void *dictGetVal(const dictEntry *de) {
    assert(entryHasValue(de));
    return de->v.val;
}

de：需要获得key或value信息的对应entry。

static dictEntry *dictGetNext(const dictEntry *de) {
    if (entryIsKey(de)) return NULL; /* there's no next */
    if (entryIsNoValue(de)) return decodeEntryNoValue(de)->next;
    return de->next;
}

//返回下一结点的指针，如果下一结点不存在，返回NULL
static dictEntry **dictGetNextRef(dictEntry *de) {
    if (entryIsKey(de)) return NULL;
    if (entryIsNoValue(de)) return &decodeEntryNoValue(de)->next;
    return &de->next;
}
//设置下一个结点
static void dictSetNext(dictEntry *de, dictEntry *next) {
    assert(!entryIsKey(de));
    if (entryIsNoValue(de)) {
        dictEntryNoValue *entry = decodeEntryNoValue(de);
        entry->next = next;
    } else {
        de->next = next;
    }
}

de：操作对象字典。
next：下一结点指针。

3.对迭代器进行操作的函数

字典迭代器的创建

首先是用dictInitIterator函数初始化字典迭代器，后面在创建迭代器函数中先分配迭代器内存，再调用上述函数对其进行初始化。dictResetIterator(）函数根据字典对应迭代器状态，重设字典内部变量值。

//对分配好内存的迭代器进行初始化，对内部进行赋值
//注意此时safe=0，迭代器非安全
//程序只会调用dictNext对字典进行迭代，而不能对字典进行修改
void dictInitIterator(dictIterator *iter, dict *d)
{
    iter->d = d;
    iter->table = 0;
    iter->index = -1;
    iter->safe = 0;
    iter->entry = NULL;
    iter->nextEntry = NULL;
}

//创建并返回给定结点的安全迭代器，修改safe为1
//意味着这个迭代器可以对字典进行修改操作，如dictAdd，dictFind等函数
void dictInitSafeIterator(dictIterator *iter, dict *d)
{
    dictInitIterator(iter, d);
    iter->safe = 1;
}

//根据字典迭代器参数重新设置d中pauserehash变量值，控制rehash进程
void dictResetIterator(dictIterator *iter)
{
    if (!(iter->index == -1 && iter->table == 0)) {
        if (iter->safe)
        	//#define dictResumeRehashing(d) ((d)->pauserehash--) （from dict.h）
        	//dictResumeRehashing被宏定义为控制dict内部pauserehash变量值减1的函数
        	//当pauserehash大于0，rehashing暂停，小于0则表示编码错误
            dictResumeRehashing(iter->d);
        else
            assert(iter->fingerprint == dictFingerprint(iter->d));
    }
}

//创建迭代器结构，分配空间并初始化
//返回迭代器指针
dictIterator *dictGetIterator(dict *d)
{
    dictIterator *iter = zmalloc(sizeof(*iter));
    dictInitIterator(iter, d);
    return iter;
}

//在dictGetIterator函数基础上，将创建好的迭代器设置为安全迭代器
dictIterator *dictGetSafeIterator(dict *d) {
    dictIterator *i = dictGetIterator(d);

    i->safe = 1;
    return i;
}

d：操作对象字典。
iter：操作目标迭代器。

对哈希表进行迭代遍历操作

dictEntry *dictNext(dictIterator *iter)
{
    while (1) {
        if (iter->entry == NULL) {
        	//若遍历的index为-1，并且此时为ht_table[0]
            if (iter->index == -1 && iter->table == 0) {
            	//若此迭代器安全，则停止rehash
                if (iter->safe)
                    dictPauseRehashing(iter->d);
                else
                    iter->fingerprint = dictFingerprint(iter->d);

                //跳过表[0]的rehash
                if (dictIsRehashing(iter->d)) {
                    iter->index = iter->d->rehashidx - 1;
                }
            }
            iter->index++;
            //若遍历的index大于整个哈希表数组的大小，说明遍历完成
            //若该哈希表正在遍历，则跳到表1，index归0，否则结束跳出
            if (iter->index >= (long) DICTHT_SIZE(iter->d->ht_size_exp[iter->table])) {
                if (dictIsRehashing(iter->d) && iter->table == 0) {
                    iter->table++;
                    iter->index = 0;
                } else {
                    break;
                }
            }
            iter->entry = iter->d->ht_table[iter->table][iter->index];
        } else {
        	//遍历下一个元素
            iter->entry = iter->nextEntry;
        }
        if (iter->entry) {
            /* We need to save the 'next' here, the iterator user
             * may delete the entry we are returning. */
            //返回遍历过程中的下一个元素
            iter->nextEntry = dictGetNext(iter->entry);
            //返回当前遍历的元素
            return iter->entry;
        }
    }
    return NULL;
}

iter：操作目标迭代器。

释放迭代器

void dictReleaseIterator(dictIterator *iter)
{
    dictResetIterator(iter);
    zfree(iter);
}

从哈希表中获取随机的key

dictEntry *dictGetRandomKey(dict *d)
{
    dictEntry *he, *orighe;
    unsigned long h;
    int listlen, listele;
	//若字典大小为0，即没有冲突记录，没有元素，返回NULL
    if (dictSize(d) == 0) return NULL;
    if (dictIsRehashing(d)) _dictRehashStep(d);
    if (dictIsRehashing(d)) {
        unsigned long s0 = DICTHT_SIZE(d->ht_size_exp[0]);
        do {
            //确保indexes中从0到rehashidx-1没有元素
            h = d->rehashidx + (randomULong() % (dictBuckets(d) - d->rehashidx));
            he = (h >= s0) ? d->ht_table[1][h - s0] : d->ht_table[0][h];
        } while(he == NULL);
    } else {
    //下面的做法是先随机选取散列数组中的一个槽，这样就得到一个链表
    //（如果该槽中没有元素则重新选取）然后在该列表中随机选取一个键值对返回
        unsigned long m = DICTHT_SIZE_MASK(d->ht_size_exp[0]);
        do {
            h = randomULong() & m;
            he = d->ht_table[0][h];
        } while(he == NULL);
    }

    //计算处于这个slot里面的元素数目
    listlen = 0;
    orighe = he;
    while(he) {
        he = dictGetNext(he);
        listlen++;
    }
    //从整个slot链表中选择元素的位置
    listele = random() % listlen;
    he = orighe;
    //找到对应位置
    while(listele--) he = dictGetNext(he);
    return he;
}

d：操作对象字典。

四、dict 的 private functions

1. 判断是否允许扩充

因为当 dict 扩展时，可能需要一次分配巨大的内存块，所以如果 dict->type具有 expandAllowed 成员函数，将检查是否允许此分配。

static int dictTypeExpandAllowed(dict *d) {
    if (d->type->expandAllowed == NULL) return 1;
    return d->type->expandAllowed(
                    DICTHT_SIZE(_dictNextExp(d->ht_used[0] + 1)) * sizeof(dictEntry*),
                    (double)d->ht_used[0] / DICTHT_SIZE(d->ht_size_exp[0]));
}

d：操作对象字典。

2. 需要时扩充哈希表

判断字典的哈希表是否需要扩充，如果需要则对哈希表进行dictExpand。

static int _dictExpandIfNeeded(dict *d)
{
    /* Incremental rehashing already in progress. Return. */
    //如果正在进行rehash，直接返回
    if (dictIsRehashing(d)) return DICT_OK;

    /* If the hash table is empty expand it to the initial size. */
    //如果哈希表为空，扩充哈希表到初始大小，4
    if (DICTHT_SIZE(d->ht_size_exp[0]) == 0) return dictExpand(d, DICT_HT_INITIAL_SIZE);

    //如果当前哈希表数据记录数used大于小于容量大小size，则将大小扩充为2倍
    if ((dict_can_resize == DICT_RESIZE_ENABLE &&
         d->ht_used[0] >= DICTHT_SIZE(d->ht_size_exp[0])) ||
        (dict_can_resize != DICT_RESIZE_FORBID &&
         d->ht_used[0] / DICTHT_SIZE(d->ht_size_exp[0]) > dict_force_resize_ratio))
    {
        if (!dictTypeExpandAllowed(d))
            return DICT_OK;
        return dictExpand(d, d->ht_used[0] + 1);
    }
    return DICT_OK;
}

d：操作对象字典。

3.哈希表大小的指数级别

哈希表size是2的整数次幂，本函数求该指数exp大小。

static signed char _dictNextExp(unsigned long size)
{
    if (size <= DICT_HT_INITIAL_SIZE) return DICT_HT_INITIAL_EXP;
    //不能超过长整型的最大值
    if (size >= LONG_MAX) return (8*sizeof(long)-1);

    return 8*sizeof(long) - __builtin_clzl(size-1);
}

size：哈希表容量大小。

4.查找指定键的位置

void *dictFindPositionForInsert(dict *d, const void *key, dictEntry **existing) {
    unsigned long idx, table;
    dictEntry *he;
    //调用对应哈希函数获得给定key的哈希值
    uint64_t hash = dictHashKey(d, key);
    if (existing) *existing = NULL;
    if (dictIsRehashing(d)) _dictRehashStep(d);

    /* Expand the hash table if needed */
    if (_dictExpandIfNeeded(d) == DICT_ERR)
        return NULL;
    for (table = 0; table <= 1; table++) {
        idx = hash & DICTHT_SIZE_MASK(d->ht_size_exp[table]);
        if (table == 0 && (long)idx < d->rehashidx) continue; 
        
        he = d->ht_table[table][idx];
        //依次比较该slot上的所有键值对的keu是否与给定key相等
        while(he) {
            void *he_key = dictGetKey(he);
            if (key == he_key || dictCompareKeys(d, key, he_key)) {
                if (existing) *existing = he;
                return NULL;
            }
            he = dictGetNext(he);
        }
        //若不在执行rehash操作，第二个表为空表，无需继续查找
        if (!dictIsRehashing(d)) break;
    }

    //如果正在rehash，则桶已经在第二个哈希表返回
    dictEntry **bucket = &d->ht_table[dictIsRehashing(d) ? 1 : 0][idx];
    return bucket;
}

d：操作对象字典。
key：指定键。
existing：一个entry指针。

5.清空字典数据

只清空数据，但不释放空间。（而_dictClear函数释放字典空间）

void dictEmpty(dict *d, void(callback)(dict*)) {
    _dictClear(d,0,callback);
    _dictClear(d,1,callback);
    d->rehashidx = -1;
    d->pauserehash = 0;
}

d：操作对象字典。

6.使用指针和预先计算的哈希值寻找结点

哈希值用dictGetHsh函数提供。如果找到了符合要求的结点，返回值是一个指向该结点的oldptr指针，否则返回NULL。

dictEntry *dictFindEntryByPtrAndHash(dict *d, const void *oldptr, uint64_t hash) {
    dictEntry *he;
    unsigned long idx, table;

    if (dictSize(d) == 0) return NULL; /* dict is empty */
    for (table = 0; table <= 1; table++) {
        idx = hash & DICTHT_SIZE_MASK(d->ht_size_exp[table]);
        if (table == 0 && (long)idx < d->rehashidx) continue;
        he = d->ht_table[table][idx];
        while(he) {
            if (oldptr == dictGetKey(he))
                return he;
            he = dictGetNext(he);
        }
        if (!dictIsRehashing(d)) return NULL;
    }
    return NULL;
}

d：操作对象字典。
oldkey：一个死指针，不应被访问。
hash：给定的预先计算的哈希值，用来寻找符合要求的指针。

7.返回哈希表大小

提供给定字典的两个哈希表的size，这个函数仅在初始化或者rehash过程中调用。

void dictRehashingInfo(dict *d, unsigned long long *from_size, unsigned long long *to_size) {
    /* Expansion during initialization. */
    if (d->ht_size_exp[0] == -1) {
        *from_size = DICTHT_SIZE(d->ht_size_exp[0]);
        *to_size = DICTHT_SIZE(DICT_HT_INITIAL_EXP);
        return;
    }
    /* Invalid method usage if rehashing isn't ongoing. */
    assert(dictIsRehashing(d));
    *from_size = DICTHT_SIZE(d->ht_size_exp[0]);
    *to_size = DICTHT_SIZE(d->ht_size_exp[1]);
}

d：操作对象字典。
from_size：指代哈希表的size。
to_size：指代哈希表size的以2为底形式的指数。

五、debugging

debugging模块中是一系列检查哈希表数据的函数，用dictStats结构体保存哈希表数据，所以这些函数都是在stats上操作，用以排查哈希表错误之处。

1.dictStats结构声明

typedef struct dictStats {
    int htidx;
    unsigned long buckets;
    unsigned long maxChainLen;
    unsigned long totalChainLen;
    unsigned long htSize;
    unsigned long htUsed;
    unsigned long *clvector;
} dictStats;

htidx：哈希表索引。
buckets：dict由数组+链表构成，数组的每个元素占用的槽位就叫做哈希桶，当出现散列冲突，就会在这个桶下挂一个链表，解决冲突。
maxChainLen：链表最大长度，即桶的深度。
totalChainLen：当前链表总长度。
htSize：哈希表容量大小，即数组长度。
htSize：哈希表冲突记录数。
clvector：一个指针，指代Stats。

2.dictFreeStats

释放Stats内存空间。

void dictFreeStats(dictStats *stats) {
    zfree(stats->clvector);
    zfree(stats);
}

stats：操作对象哈希表数据统计结构。

3.dictCombineStats

将from数据加到into中，融合两个哈希表的数据。

void dictCombineStats(dictStats *from, dictStats *into) {
    into->buckets += from->buckets;
    into->maxChainLen = (from->maxChainLen > into->maxChainLen) ? from->maxChainLen : into->maxChainLen;
    into->totalChainLen += from->totalChainLen;
    into->htSize += from->htSize;
    into->htUsed += from->htUsed;
    for (int i = 0; i < DICT_STATS_VECTLEN; i++) {
        into->clvector[i] += from->clvector[i];
    }
}

from：哈希表数据统计结构，包含要添加的数据。
into：哈希表数据统计结构，被添加。

4.dictGetStatsHt

创建一个stats，并用指定字典的某哈希表的数据赋值，返回stats。获得需要的哈希表的数据。

dictStats *dictGetStatsHt(dict *d, int htidx, int full) {
    unsigned long *clvector = zcalloc(sizeof(unsigned long) * DICT_STATS_VECTLEN);
    dictStats *stats = zcalloc(sizeof(dictStats));
    stats->htidx = htidx;
    stats->clvector = clvector;
    stats->htSize = DICTHT_SIZE(d->ht_size_exp[htidx]);
    stats->htUsed = d->ht_used[htidx];
    if (!full) return stats;
    //full不为0，计算stats
    /* Compute stats. */
    for (unsigned long i = 0; i < DICTHT_SIZE(d->ht_size_exp[htidx]); i++) {
        dictEntry *he;

        if (d->ht_table[htidx][i] == NULL) {
            clvector[0]++;
            continue;
        }
        stats->buckets++;
        /* For each hash entry on this slot... */
        unsigned long chainlen = 0;
        he = d->ht_table[htidx][i];
        while(he) {
            chainlen++;
            he = dictGetNext(he);
        }
        clvector[(chainlen < DICT_STATS_VECTLEN) ? chainlen : (DICT_STATS_VECTLEN-1)]++;
        if (chainlen > stats->maxChainLen) stats->maxChainLen = chainlen;
        stats->totalChainLen += chainlen;
    }

    return stats;
}

d：操作对象字典。
htidx：哈希表索引。
full：是否计算统计信息的标志。若为0，则直接返回初始化赋值的stats，若不为0，则要计算stats。

5.dictGetStatsMsg

将stats中的数据转换成用户可以直接理解的直观数据并输出。

size_t dictGetStatsMsg(char *buf, size_t bufsize, dictStats *stats, int full) {
    if (stats->htUsed == 0) {
        return snprintf(buf,bufsize,
            "Hash table %d stats (%s):\n"
            "No stats available for empty dictionaries\n",
            stats->htidx, (stats->htidx == 0) ? "main hash table" : "rehashing target");
    }
    size_t l = 0;
    l += snprintf(buf + l, bufsize - l,
                  "Hash table %d stats (%s):\n"
                  " table size: %lu\n"
                  " number of elements: %lu\n",
                  stats->htidx, (stats->htidx == 0) ? "main hash table" : "rehashing target",
                  stats->htSize, stats->htUsed);
    if (full) {
        l += snprintf(buf + l, bufsize - l,
                      " different slots: %lu\n"
                      " max chain length: %lu\n"
                      " avg chain length (counted): %.02f\n"
                      " avg chain length (computed): %.02f\n"
                      " Chain length distribution:\n",
                      stats->buckets, stats->maxChainLen,
                      (float) stats->totalChainLen / stats->buckets, (float) stats->htUsed / stats->buckets);

        for (unsigned long i = 0; i < DICT_STATS_VECTLEN - 1; i++) {
            if (stats->clvector[i] == 0) continue;
            if (l >= bufsize) break;
            l += snprintf(buf + l, bufsize - l,
                          "   %ld: %ld (%.02f%%)\n",
                          i, stats->clvector[i], ((float) stats->clvector[i] / stats->htSize) * 100);
        }
    }

    /* Make sure there is a NULL term at the end. */
    buf[bufsize-1] = '\0';
    /* Unlike snprintf(), return the number of characters actually written. */
    return strlen(buf);
}

buf：字符指针。
bufsize：buf指代字符串大小。
stats：操作对象哈希表数据统计结构。
full：判断标志。

6.dictGetStats

调用dictGetStatsHt函数，创建一个stats，记录字典数据。如果字典rehashing，则再创建一个stats，输出两个哈希表数据，否则只输出表[0]数据。输出后释放stats空间。

void dictGetStats(char *buf, size_t bufsize, dict *d, int full) {
    size_t l;
    char *orig_buf = buf;
    size_t orig_bufsize = bufsize;

    dictStats *mainHtStats = dictGetStatsHt(d, 0, full);
    l = dictGetStatsMsg(buf, bufsize, mainHtStats, full);
    dictFreeStats(mainHtStats);
    buf += l;
    bufsize -= l;
    if (dictIsRehashing(d) && bufsize > 0) {
        dictStats *rehashHtStats = dictGetStatsHt(d, 1, full);
        dictGetStatsMsg(buf, bufsize, rehashHtStats, full);
        dictFreeStats(rehashHtStats);
    }
    /* Make sure there is a NULL term at the end. */
    orig_buf[orig_bufsize-1] = '\0';
}