【redis源码分析】字典---dict

最新推荐文章于 2024-09-15 04:17:06 发布

遇_见

最新推荐文章于 2024-09-15 04:17:06 发布

阅读量805

点赞数

分类专栏：分布式存储文章标签：大数据存储 c语言数据库源码

本文链接：https://blog.csdn.net/hustraiet/article/details/26399279

版权

分布式存储专栏收录该内容

14 篇文章 0 订阅

订阅专栏

本文深入探讨Redis核心结构——字典（dict）。重点介绍rehash策略，包括单步rehash和时间限制rehash，以及字典的扩展条件。此外，还提到了安全与非安全迭代器在rehash过程中的作用，确保数据遍历的准确性。

摘要由CSDN通过智能技术生成

字典结构是redis的一个核心结构，设计及如下图所示：

其中主要需要注意的有以下几点：

1，rehash操作

为了实现rehash操作，dict内部维护了两个hashtable，ht[0]作为master，当没有出于rehash状态时，所有的数据都保存在ht[0]中，但是如果出于rehash状态，则将新的数据添加到ht[1]中，保证数据保存的一致性。

为了实现渐进式hash以保证rehash过程不会使整个系统停滞，redis提供了两种rehash的方式，第一种是单步rehash，也就是说每次调用只rehash一个桶，这个操作基本在dict的所有查询操作中都回执行；第二种是基于执行时间的rehash，它会使系统只阻塞指定时间。

2，字典的扩展操作

为了保证hash表的效率，在一定条件下（(d->ht[0].used >= d->ht[0].size &&
(dict_can_resize || d->ht[0].used/d->ht[0].size > dict_force_resize_ratio))），会对hash桶进行扩展，扩展的操作实际上就是分配一个更大的hash表，把它指定为ht[1]，并打开rehash标志，使得上面的rehash操作得以真正执行，注意这里有一点需要强调一下，rehash操作只是一个过程（一个执行体），它是否执行真正的rehash操作，需要扩展操作为它打开标志，也就说真正控制是否执行rehash操作的开关是扩展操作。

3，安全迭代器和非安全迭代器

非安全迭代器只能进行get操作，而安全迭代器则可以进行迭代器支持的所有操作，由于dict结构中保存了安全迭代器的数目，如果数目不为0，是不能进行rehash操作的，因此安全迭代器的存在保证了数据便利的准确性。

下面是dict.h的注释源码：

#include <stdint.h>

#ifndef __DICT_H
#define __DICT_H

/*
 * 操作返回状态
 */
#define DICT_OK 0
#define DICT_ERR 1

/* Unused arguments generate annoying warnings... */
#define DICT_NOTUSED(V) ((void) V)

/*
 * 哈希表节点
 */
typedef struct dictEntry {
    // 键
    void *key;
    // 值
    union {
        void *val;
        uint64_t u64;
        int64_t s64;
    } v;

    // 链往后继节点
    struct dictEntry *next; 

} dictEntry;

/*
 * 特定于类型的一簇处理函数
 */
typedef struct dictType {
    // 计算键的哈希值函数, 计算key在hash table中的存储位置，不同的dict可以有不同的hash function.
    unsigned int (*hashFunction)(const void *key);
    // 复制键的函数
    void *(*keyDup)(void *privdata, const void *key);
    // 复制值的函数
    void *(*valDup)(void *privdata, const void *obj);
    // 对比两个键的函数
    int (*keyCompare)(void *privdata, const void *key1, const void *key2);
    // 键的释构函数
    void (*keyDestructor)(void *privdata, void *key);
    // 值的释构函数
    void (*valDestructor)(void *privdata, void *obj);
} dictType;

/*
 * 哈希表
 */
typedef struct dictht {
    // 哈希表节点指针数组（俗称桶，bucket）
    dictEntry **table;      
    // 指针数组的大小
    unsigned long size;     
    // 指针数组的长度掩码，用于计算索引值
    unsigned long sizemask; 
    // 哈希表现有的节点数量
    unsigned long used;     
} dictht;

/*
 * 字典
 *
 * 每个字典使用两个哈希表，用于实现渐进式 rehash
 */
typedef struct dict {

    // 特定于类型的处理函数
    dictType *type;
    // 类型处理函数的私有数据
    void *privdata;
    // 哈希表（2个）
    dictht ht[2];       
    // 记录 rehash 进度的标志，值为-1 表示 rehash 未进行
    int rehashidx;
    // 当前正在运作的安全迭代器数量
    int iterators;      

} dict;

/*
 * 字典迭代器
 *
 * 如果 safe 属性的值为 1 ，那么表示这个迭代器是一个安全迭代器。
 * 当安全迭代器正在迭代一个字典时，该字典仍然可以调用 dictAdd 、 dictFind 和其他函数。
 *
 * 如果 safe 属性的值为 0 ，那么表示这不是一个安全迭代器。
 * 如果正在运作的迭代器是不安全迭代器，那么它只可以对字典调用 dictNext 函数。
 */
typedef struct dictIterator {

    // 正在迭代的字典
    dict *d;                
    
    int table,              // 正在迭代的哈希表的号码（0 或者 1）
        index,              // 正在迭代的哈希表数组的索引
        safe;               // 是否安全？

    dictEntry *entry,       // 当前哈希节点
              *nextEntry;   // 当前哈希节点的后继节点
} dictIterator;

// 哈希表的初始化大小
#define DICT_HT_INITIAL_SIZE     4

/* ------------------------------- Macros ------------------------------------*/

//执行val的释放操作（由使用者定义）
#define dictFreeVal(d, entry) \
    if ((d)->type->valDestructor) \
        (d)->type->valDestructor((d)->privdata, (entry)->v.val)

//执行val的设置操作（由使用者定义，如果没有则执行值得赋值）
#define dictSetVal(d, entry, _val_) do { \
    if ((d)->type->valDup) \
        entry->v.val = (d)->type->valDup((d)->privdata, _val_); \
    else \
        entry->v.val = (_val_); \
} while(0)

//执行val的设置操作（只设置s64）
#define dictSetSignedIntegerVal(entry, _val_) \
    do { entry->v.s64 = _val_; } while(0)

//执行val的设置操作（只设置u64）
#define dictSetUnsignedIntegerVal(entry, _val_) \
    do { entry->v.u64 = _val_; } while(0)
//执行key的释放操作（由使用者定义）
#define dictFreeKey(d, entry) \
    if ((d)->type->keyDestructor) \
        (d)->type->keyDestructor((d)->privdata, (entry)->key)

//执行key的设置操作（由使用者定义，如果没有则之间执行值得比较）
#define dictSetKey(d, entry, _key_) do { \
    if ((d)->type->keyDup) \
        entry->key = (d)->type->keyDup((d)->privdata, _key_); \
    else \
        entry->key = (_key_); \
} while(0)

//执行两个key的比较操作，相同返回true，否则返回false
#define dictCompareKeys(d, key1, key2) \
    (((d)->type->keyCompare) ? \
        (d)->type->keyCompare((d)->privdata, key1, key2) : \
        (key1) == (key2))
//一些基本操作
#define dictHashKey(d, key) (d)->type->hashFunction(key)
#define dictGetKey(he) ((he)->key)
#define dictGetVal(he) ((he)->v.val)
#define dictGetSignedIntegerVal(he) ((he)->v.s64)
#define dictGetUnsignedIntegerVal(he) ((he)->v.u64)
#define dictSlots(d) ((d)->ht[0].size+(d)->ht[1].size)
#define dictSize(d) ((d)->ht[0].used+(d)->ht[1].used)
#define dictIsRehashing(ht) ((ht)->rehashidx != -1)

/* API */
dict *dictCreate(dictType *type, void *privDataPtr);
int dictExpand(dict *d, unsigned long size);
int dictAdd(dict *d, void *key, void *val);
dictEntry *dictAddRaw(dict *d, void *key);
int dictReplace(dict *d, void *key, void *val);
dictEntry *dictReplaceRaw(dict *d, void *key);
int dictDelete(dict *d, const void *key);
int dictDeleteNoFree(dict *d, const void *key);
void dictRelease(dict *d);
dictEntry * dictFind(dict *d, const void *key);
void *dictFetchValue(dict *d, const void *key);
int dictResize(dict *d);
dictIterator *dictGetIterator(dict *d);
dictIterator *dictGetSafeIterator(dict *d);
dictEntry *dictNext(dictIterator *iter);
void dictReleaseIterator(dictIterator *iter);
dictEntry *dictGetRandomKey(dict *d);
void dictPrintStats(dict *d);
unsigned int dictGenHashFunction(const void *key, int len);
unsigned int dictGenCaseHashFunction(const unsigned char *buf, int len);
void dictEmpty(dict *d);
void dictEnableResize(void);
void dictDisableResize(void);
int dictRehash(dict *d, int n);
int dictRehashMilliseconds(dict *d, int ms);
void dictSetHashFunctionSeed(unsigned int initval);
unsigned int dictGetHashFunctionSeed(void);

/* Hash table types */
extern dictType dictTypeHeapStringCopyKey;
extern dictType dictTypeHeapStrings;
extern dictType dictTypeHeapStringCopyKeyValue;

#endif /* __DICT_H */

dict.c的源码注释：

#include "fmacros.h"

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#include <assert.h>
#include <limits.h>
#include <sys/time.h>
#include <ctype.h>

#include "dict.h"
#include "zmalloc.h"

/* Using dictEnableResize() / dictDisableResize() we make possible to
 * enable/disable resizing of the hash table as needed. This is very important
 * for Redis, as we use copy-on-write and don't want to move too much memory
 * around when there is a child performing saving operations.
 *
 * Note that even when dict_can_resize is set to 0, not all resizes are
 * prevented: an hash table is still allowed to grow if the ratio between
 * the number of elements and the buckets > dict_force_resize_ratio. */
 /*
 使用dictEnableResize() / dictDisableResize()函数，我们可以在需要的时候，关闭哈希表的resize操作，
 对于redis，使用cow（写时复制）或是当有一个进程在执行save操作的时候我们不想移动太多内存的时候，这个功能将非常有用
 
 但是注意，即使设置了dict_can_resize = 0，有些情况下仍然可以进行resize操作。
 
 */
static int dict_can_resize = 1;
static unsigned int dict_force_resize_ratio = 5;

/* -------------------------- private prototypes ---------------------------- */

static int _dictExpandIfNeeded(dict *ht);
static unsigned long _dictNextPower(unsigned long size);
static int _dictKeyIndex(dict *ht, const void *key);
static int _dictInit(dict *ht, dictType *type, void *privDataPtr);

/* -------------------------- hash functions -------------------------------- */

/* Thomas Wang's 32 bit Mix Function */
//对无符号整型的hash函数
unsigned int dictIntHashFunction(unsigned int key)
{
    key += ~(key << 15);
    key ^=  (key >> 10);
    key +=  (key << 3);
    key ^=  (key >> 6);
    key += ~(key << 11);
    key ^=  (key >> 16);
    return key;
}

/* Identity hash function for integer keys */
//对于无符号整形的恒等hash（也即是输入和输出就会他本身）
unsigned int dictIdentityHashFunction(unsigned int key)
{
    return key;
}

static uint32_t dict_hash_function_seed = 5381;
//设置字典hash函数的种子
void dictSetHashFunctionSeed(uint32_t seed) {
    dict_hash_function_seed = seed;
}
//返回hash函数的种子
uint32_t dictGetHashFunctionSeed(void) {
    return dict_hash_function_seed;
}

/* MurmurHash2, by Austin Appleby
 * Note - This code makes a few assumptions about how your machine behaves -
 * 1. We can read a 4-byte value from any address without crashing
 * 2. sizeof(int) == 4
 *
 * And it has a few limitations -
 *
 * 1. It will not work incrementally.
 * 2. It will not produce the same results on little-endian and big-endian
 *    machines.
 *
 * 算法的具体信息可以参考 http://code.google.com/p/smhasher/
 */
 //通用的hash函数，输入为指针和字节长度，输出为无符号整数
unsigned int dictGenHashFunction(const void *key, int len) {
    /* 'm' and 'r' are mixing constants generated offline.
     They're not really 'magic', they just happen to work well.  */
    uint32_t seed = dict_hash_function_seed;
    const uint32_t m = 0x5bd1e995;
    const int r = 24;

    /* Initialize the hash to a 'random' value */
    uint32_t h = seed ^ len;

    /* Mix 4 bytes at a time into the hash */
    const unsigned char *data = (const unsigned char *)key;

    while(len >= 4) {
        uint32_t k = *(uint32_t*)data;

        k *= m;
        k ^= k >> r;
        k *= m;

        h *= m;
        h ^= k;

        data += 4;
        len -= 4;
    }

    /* Handle the last few bytes of the input array  */
    switch(len) {
    case 3: h ^= data[2] << 16;
    case 2: h ^= data[1] << 8;
    case 1: h ^= data[0]; h *= m;
    };

    /* Do a few final mixes of the hash to ensure the last few
     * bytes are well-incorporated. */
    h ^= h >> 13;
    h *= m;
    h ^= h >> 15;

    return (unsigned int)h;
}

/* And a case insensitive hash function (based on djb hash) */
//字符敏感的通用hash函数，会使用每个字符的小写形式（hash = hash * 32 + c）
unsigned int dictGenCaseHashFunction(const unsigned char *buf, int len) {
    unsigned int hash = (unsigned int)dict_hash_function_seed;

    while (len--)
        hash = ((hash << 5) + hash) + (tolower(*buf++)); /* hash * 32 + c */
    return hash;
}

/* ----------------------------- API implementation ------------------------- */

/*
 * 重置哈希表的各项属性
 *
 * T = O(1)
 */
static void _dictReset(dictht *ht)
{
    ht->table = NULL;
    ht->size = 0;
    ht->sizemask = 0;
    ht->used = 0;
}

/*
 * 创建一个新字典
 *
 * T = O(1)
 */
dict *dictCreate(dictType *type,
        void *privDataPtr)
{
    // 分配空间
    dict *d = zmalloc(sizeof(*d));

    // 初始化字典
    _dictInit(d,type,privDataPtr);

    return d;
}

/*
 * 初始化字典
 *
 * T = O(1)
 */
int _dictInit(dict *d, dictType *type,
        void *privDataPtr)
{
    // 初始化 ht[0]
    _dictReset(&d->ht[0]);

    // 初始化 ht[1]
    _dictReset(&d->ht[1]);

    // 初始化字典属性
    d->type = type;
    d->privdata = privDataPtr;
    d->rehashidx = -1;
    d->iterators = 0;

    return DICT_OK;
}

/* Resize the table to the minimal size that contains all the elements,
 * but with the invariant of a USED/BUCKETS ratio near to <= 1 */
/*
 * 对字典进行紧缩，让节点数/桶数的比率接近 <= 1 。
 *
 * T = O(N)
 */
int dictResize(dict *d)
{
    int minimal;

    // 不能在 dict_can_resize 为假
    // 或者字典正在 rehash 时调用
    //也就是说，此时的字典正在resize或rehash
    if (!dict_can_resize || dictIsRehashing(d)) return DICT_ERR;

    minimal = d->ht[0].used;

    if (minimal < DICT_HT_INITIAL_SIZE)
        minimal = DICT_HT_INITIAL_SIZE;

    return dictExpand(d, minimal);
}

/* Expand or create the hash table */
/*
 * 创建一个新哈希表（只是重新分配空间），并视情况，进行以下动作之一：
 *  
 *   1) 如果字典里的 ht[0] 为空，将新哈希表赋值给它
 *   2) 如果字典里的 ht[0] 不为空，那么将新哈希表赋值给 ht[1] ，并打开 rehash 标识
 *
 * T = O(N)
 */
int dictExpand(dict *d, unsigned long size)
{
    dictht n; /* the new hash table */
    
    // 计算新的hash表的实际大小
    unsigned long realsize = _dictNextPower(size);

    /* the size is invalid if it is smaller than the number of
     * elements already inside the hash table */
     //如果正在被rehash或是新的大小比实际的节点数还小的话，出错返回
    if (dictIsRehashing(d) || d->ht[0].used > size)
        return DICT_ERR;

    /* Allocate the new hash table and initialize all pointers to NULL */
    // 创建并初始化新哈希表
    // O(N)
    n.size = realsize;
    n.sizemask = realsize-1;
    n.table = zcalloc(realsize*sizeof(dictEntry*));
    n.used = 0;

    /* Is this the first initialization? If so it's not really a rehashing
     * we just set the first hash table so that it can accept keys. */
    // 如果 ht[0] 为空，那么这就是一次创建新哈希表行为
    // 将新哈希表设置为 ht[0] ，然后返回
    if (d->ht[0].table == NULL) {
        d->ht[0] = n;
        return DICT_OK;
    }

    /* Prepare a second hash table for incremental rehashing */
    // 如果 ht[0] 不为空，那么这就是一次扩展字典的行为
    // 将新哈希表设置为 ht[1] ，并打开 rehash 标识
    d->ht[1] = n;
    d->rehashidx = 0;

    return DICT_OK;
}

/*
 * 执行 N 步渐进式 rehash 。
 *
 * 如果执行之后哈希表还有元素需要 rehash ，那么返回 1 。
 * 如果哈希表里面所有元素已经迁移完毕，那么返回 0 。
 *
 * 每步 rehash 都会移动哈希表数组内某个索引上的整个链表节点，
 * 所以从 ht[0] 迁移到 ht[1] 的 key 可能不止一个。
 *
 * T = O(N)
 */
int dictRehash(dict *d, int n) {
	//如果没有处在rehash状态，也就是说，现在已经全部迁移完了，
	//注意：这个函数只是判断执行体，他并不控制rehash的开始，rehash的开始是由其他函数来打开开关变量的
    if (!dictIsRehashing(d)) return 0;

    while(n--) {
        dictEntry *de, *nextde;

        // 如果 ht[0] 已经为空，那么迁移完毕
        // 用 ht[1] 代替原来的 ht[0]
        if (d->ht[0].used == 0) {

            // 释放 ht[0] 的哈希表数组
            zfree(d->ht[0].table);

            // 将 ht[0] 指向 ht[1]
            d->ht[0] = d->ht[1];

            // 清空 ht[1] 的指针
            _dictReset(&d->ht[1]);

            // 关闭 rehash 标识
            d->rehashidx = -1;

            // 通知调用者， rehash 完毕
            return 0;
        }

        /* Note that rehashidx can't overflow as we are sure there are more
         * elements because ht[0].used != 0 */
         //整个hash表我们已经保证了：节点数/桶数的比率接近 <= 1 ，所以只要每一次都更新了used的值，
         //就可以确定size的值大于rehashidx，如，现在还有100个节点，为了满足：节点数/桶数 <= 1，则至少还有100个桶，
        assert(d->ht[0].size > (unsigned)d->rehashidx);
        // 移动到数组中首个不为 NULL 链表的索引上，这样保证每次rehash尽量完成n个桶的rehash
        while(d->ht[0].table[d->rehashidx] == NULL) d->rehashidx++;
        // 指向链表头
        de = d->ht[0].table[d->rehashidx];
        // 将链表内的所有元素从 ht[0] 迁移到 ht[1]
        // 因为桶内的元素通常只有一个，或者不多于某个特定比率
        // 所以可以将这个操作看作 O(1)
        while(de) {
            unsigned int h;

            nextde = de->next;

            /* Get the index in the new hash table */
            // 计算元素在 ht[1] 的哈希值
            h = dictHashKey(d, de->key) & d->ht[1].sizemask;

            // 插入到ht[1]的相应桶的表头（头插）
            de->next = d->ht[1].table[h];
            d->ht[1].table[h] = de;

            // 更新计数器
            d->ht[0].used--;
            d->ht[1].used++;

            de = nextde;
        }

        // 设置指针为 NULL
        d->ht[0].table[d->rehashidx] = NULL;

        // 前进至下一索引
        d->rehashidx++;
    }

    // 通知调用者，还有元素等待 rehash
    return 1;
}

/*
 * 以毫秒为单位，返回当前时间
 *
 * T = O(1)
 */
long long timeInMilliseconds(void) {
    struct timeval tv;

    gettimeofday(&tv,NULL);
    return (((long long)tv.tv_sec)*1000)+(tv.tv_usec/1000);
}

/* Rehash for an amount of time between ms milliseconds and ms+1 milliseconds */
/*
 * 在给定毫秒数内，以 100 步为单位，对字典进行 rehash 。
 *
 * T = O(N)，N 为被 rehash 的 key-value 对数量
 */
int dictRehashMilliseconds(dict *d, int ms) {
    long long start = timeInMilliseconds();
    int rehashes = 0;
	//dictRehash返回0表示已经rehash结束了，返回1表示还需要继续hash
    while(dictRehash(d,100)) {
        rehashes += 100;
        if (timeInMilliseconds()-start > ms) break;
    }
    //返回的被rehash的桶的个数是很粗略的（因为他是以100递增的，而实际上会出现，rehash了，但是没有被计入的情况）
    return rehashes;
}

/*
 * 如果条件允许的话，将一个桶的元素从 ht[0] 迁移至 ht[1]
 *
 * 这个函数被其他查找和更新函数所调用，从而实现渐进式 rehash 。
 *
 * T = O(1)
 */
static void _dictRehashStep(dict *d) {
    // 只在没有安全迭代器的时候，才能进行迁移
    // 否则可能会产生重复元素，或者丢失元素
    if (d->iterators == 0) dictRehash(d,1);
}

/*
 * 添加给定 key-value 对到字典
 *
 * T = O(1)
 */
int dictAdd(dict *d, void *key, void *val)
{
    // 添加 key 到哈希表，返回包含该 key 的节点
    dictEntry *entry = dictAddRaw(d,key);

    // 添加失败？
    if (!entry) return DICT_ERR;

    // 设置节点的值
    dictSetVal(d, entry, val);

    return DICT_OK;
}

/* Low level add. This function adds the entry but instead of setting
 * a value returns the dictEntry structure to the user, that will make
 * sure to fill the value field as he wishes.
 *
 * This function is also directly exposed to the user API to be called
 * mainly in order to store non-pointers inside the hash value, example:
 *
 * entry = dictAddRaw(dict,mykey);
 * if (entry != NULL) dictSetSignedIntegerVal(entry,1000);
 *
 * Return values:
 *
 * If key already exists NULL is returned.
 * If key was added, the hash entry is returned to be manipulated by the caller.
 */
/*
 * 添加 key 到字典的底层实现，完成之后返回新节点（这个函数仅仅是设置了key，将val的设置操作返回给上层）。
 *
 * 如果 key 已经存在，返回 NULL 。
 *
 * T = O(1)
 */
dictEntry *dictAddRaw(dict *d, void *key)
{
    int index;
    dictEntry *entry;
    dictht *ht;

    // 尝试渐进式地 rehash 一个元素
    if (dictIsRehashing(d)) _dictRehashStep(d);

    // 查找可容纳新元素的索引位置
    // 如果元素已存在， index 为 -1
    if ((index = _dictKeyIndex(d, key)) == -1)
        return NULL;

    /* Allocate the memory and store the new entry */
    // 决定该把新元素放在那个哈希表
    ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];
    // 为新元素分配节点空间
    entry = zmalloc(sizeof(*entry));
    // 在相应的桶上执行头插
    entry->next = ht->table[index];
    ht->table[index] = entry;
    
    // 更新已有节点数量
    ht->used++;

    /* Set the hash entry fields. */
    // 设置节点的key
    dictSetKey(d, entry, key);

    // 返回新节点
    return entry;
}

/* Add an element, discarding the old if the key already exists.
 * Return 1 if the key was added from scratch, 0 if there was already an
 * element with such key and dictReplace() just performed a value update
 * operation. */
/*
 * 用新的值代替 key 原有的值（update操作）。
 * 
 * 如果 key 不存在，将关联添加到哈希表中。
 *
 * 如果关联是新创建的，返回 1 ，如果关联是被更新的，返回 0 。
 *
 * T = O(1)
 */
int dictReplace(dict *d, void *key, void *val)
{
    dictEntry *entry, auxentry;

    /* Try to add the element. If the key
     * does not exists dictAdd will suceed. */
    // 尝试添加新元素到哈希表
    // 只要 key 不存在，添加就会成功。
    // O(1)
    if (dictAdd(d, key, val) == DICT_OK)
        return 1;

    // 如果添加失败，那么说明元素已经存在
    // 获取这个元素所对应的节点
    // O(1)
    entry = dictFind(d, key);

    /* Set the new value and free the old one. Note that it is important
     * to do that in this order, as the value may just be exactly the same
     * as the previous one. In this context, think to reference counting,
     * you want to increment (set), and then decrement (free), and not the
     * reverse. */
    auxentry = *entry;          // 指向旧值
    dictSetVal(d, entry, val);  // 设置新值
    dictFreeVal(d, &auxentry);  // 释放旧值

    return 0;
}

/* dictReplaceRaw() is simply a version of dictAddRaw() that always
 * returns the hash entry of the specified key, even if the key already
 * exists and can't be added (in that case the entry of the already
 * existing key is returned.)
 *
 * See dictAddRaw() for more information. */
/*
 * 类似于 dictAddRaw() ，
 * dictReplaceRaw 无论在新添加节点还是更新节点的情况下，
 * 都返回 key 所对应的节点
 *
 * T = O(1)
 */
dictEntry *dictReplaceRaw(dict *d, void *key) {
    // 查找
    dictEntry *entry = dictFind(d,key);

    // 没找到就添加，找到直接返回
    return entry ? entry : dictAddRaw(d,key);
}

/*
 * 按 key 查找并删除节点
 *	
 *	nofree:非0表示释放节点
 			0表示释放节点
 * T = O(1)
 */
static int dictGenericDelete(dict *d, const void *key, int nofree)
{
    unsigned int h, idx;
    dictEntry *he, *prevHe;
    int table;

    // 空表？
    if (d->ht[0].size == 0) return DICT_ERR; /* d->ht[0].table is NULL */

    // 渐进式 rehash
    if (dictIsRehashing(d)) _dictRehashStep(d);

    // 计算哈希值
    h = dictHashKey(d, key);

    // 在两个哈希表中查找
    for (table = 0; table <= 1; table++) {
        // 索引值
        idx = h & d->ht[table].sizemask;
        // 索引在数组中对应的表头
        he = d->ht[table].table[idx];
        prevHe = NULL;
        // 遍历链表
        // 因为链表的元素数量通常为 1 ，或者维持在一个很小的比率
        // 因此可以将这个操作看作 O(1)
        while(he) {
            // 对比
            if (dictCompareKeys(d, key, he->key)) {
                /* Unlink the element from the list */
                if (prevHe)
                    prevHe->next = he->next;
                else
                    d->ht[table].table[idx] = he->next;
                // 释放节点的键和值
                if (!nofree) {
                    dictFreeKey(d, he);
                    dictFreeVal(d, he);
                }
                // 释放节点
                zfree(he);
                
                d->ht[table].used--;

                return DICT_OK;
            }
            prevHe = he;
            he = he->next;
        }

        // 如果不是正在进行 rehash ，
        // 那么无须遍历 ht[1] 
        if (!dictIsRehashing(d)) break;
    }

    return DICT_ERR; /* not found */
}

/*
 * 删除哈希表中的 key ，并且释放保存这个 key 的节点
 *
 * T = O(1)
 */
int dictDelete(dict *ht, const void *key) {
    return dictGenericDelete(ht,key,0);
}

/*
 * 删除哈希表中的 key ，但是并不释放保存这个 key 的节点
 *
 * T = O(1)
 */
int dictDeleteNoFree(dict *ht, const void *key) {
    return dictGenericDelete(ht,key,1);
}

/* Destroy an entire dictionary */
/*
 * 销毁给定哈希表
 *
 * T = O(N)
 */
int _dictClear(dict *d, dictht *ht)
{
    unsigned long i;

    /* Free all the elements */
    // 遍历哈希表数组
    //ht->used 《= 0时表示所有的桶都没有元素，都指向了NULL
    for (i = 0; i < ht->size && ht->used > 0; i++) {
        dictEntry *he, *nextHe;

        if ((he = ht->table[i]) == NULL) continue;
        // 释放整个链表上的元素
        // 因为链表的元素数量通常为 1 ，或者维持在一个很小的比率
        // 因此可以将这个操作看作 O(1)
        while(he) {
            nextHe = he->next;

            dictFreeKey(d, he);
            dictFreeVal(d, he);

            zfree(he);

            ht->used--;

            he = nextHe;
        }
    }

    /* Free the table and the allocated cache structure */
    zfree(ht->table);

    /* Re-initialize the table */
    //重新初始化ht，重置了这个hash表
    _dictReset(ht);

    return DICT_OK; /* never fails */
}

/*
 * 清空并释放字典
 *
 * T = O(N)
 */
void dictRelease(dict *d)
{
    _dictClear(d,&d->ht[0]);
    _dictClear(d,&d->ht[1]);

    zfree(d);
}

/*
 * 在字典中查找给定 key 所定义的节点
 *
 * 如果 key 不存在，返回 NULL
 *
 * T = O(1)
 */
dictEntry *dictFind(dict *d, const void *key)
{
    dictEntry *he;
    unsigned int h, idx, table;

    if (d->ht[0].size == 0) return NULL; /* We don't have a table at all */

    if (dictIsRehashing(d)) _dictRehashStep(d);
    
    // 计算哈希值
    h = dictHashKey(d, key);
    // 在两个哈希表中查找
    for (table = 0; table <= 1; table++) {
        // 索引值
        idx = h & d->ht[table].sizemask;
        // 节点链表
        he = d->ht[table].table[idx];
        // 在链表中查找
        // 因为链表的元素数量通常为 1 ，或者维持在一个很小的比率
        // 因此可以将这个操作看作 O(1)
        while(he) {
            // 找到并返回
            if (dictCompareKeys(d, key, he->key))
                return he;

            he = he->next;
        }

        // 如果 rehash 并不在进行中
        // 那么无须查找 ht[1]
        if (!dictIsRehashing(d)) return NULL;
    }

    return NULL;
}

/*
 * 返回在字典中， key 所对应的值 value
 *
 * 如果 key 不存在于字典，那么返回 NULL
 *
 * T = O(1)
 */
void *dictFetchValue(dict *d, const void *key) {
    dictEntry *he;

    he = dictFind(d,key);

    return he ? dictGetVal(he) : NULL;
}

/*
 * 根据给定字典，创建一个不安全迭代器。
 *
 * T = O(1)
 */
dictIterator *dictGetIterator(dict *d)
{
    dictIterator *iter = zmalloc(sizeof(*iter));

    iter->d = d;
    iter->table = 0;
    iter->index = -1;
    iter->safe = 0;
    iter->entry = NULL;
    iter->nextEntry = NULL;

    return iter;
}

/*
 * 根据给定字典，创建一个安全迭代器。
 *
 * T = O(1)
 */
dictIterator *dictGetSafeIterator(dict *d) {
    dictIterator *i = dictGetIterator(d);

    i->safe = 1;
    return i;
}

/*
 * 返回迭代器指向的当前节点。
 *
 * 如果字典已经迭代完毕，返回 NULL 。
 *
 * T = O(1)
 */
dictEntry *dictNext(dictIterator *iter)
{
    while (1) {
        if (iter->entry == NULL) {

            dictht *ht = &iter->d->ht[iter->table];

            // 在开始迭代之前，增加字典 iterators 计数器的值
            // 只有安全迭代器才会增加计数
            if (iter->safe &&
                iter->index == -1 &&
                iter->table == 0)
                iter->d->iterators++;

            // 增加索引
            iter->index++;

            // 当迭代的元素数量超过 ht->size 的值
            // 说明这个表已经迭代完毕了
            if (iter->index >= (signed) ht->size) {
                // 是否接着迭代 ht[1] ?
                if (dictIsRehashing(iter->d) && iter->table == 0) {
                    iter->table++;
                    iter->index = 0;
                    ht = &iter->d->ht[1];
                } else {
                // 如果没有 ht[1] ，或者已经迭代完了 ht[1] 到达这里
                // 跳出
                    break;
                }
            }

            // 指向下一索引的节点链表
            iter->entry = ht->table[iter->index];

        } else {
            // 指向链表的下一节点
            iter->entry = iter->nextEntry;
        }

        // 保存后继指针 nextEntry，
        // 以应对当前节点 entry 可能被修改的情况
        if (iter->entry) {
            /* We need to save the 'next' here, the iterator user
             * may delete the entry we are returning. */
            iter->nextEntry = iter->entry->next;
            return iter->entry;
        }
    }
    return NULL;
}

/*
 * 释放迭代器
 *
 * T = O(1)
 */
void dictReleaseIterator(dictIterator *iter)
{
    if (iter->safe && !(iter->index == -1 && iter->table == 0))
        iter->d->iterators--;

    zfree(iter);
}

/*
 * 从字典中返回一个随机节点。
 *
 * 可用于实现随机化算法。
 *
 * 如果字典为空，返回 NULL 。
 *
 * T = O(N)
 */
dictEntry *dictGetRandomKey(dict *d)
{
    dictEntry *he, *orighe;
    unsigned int h;
    int listlen, listele;

    // 空表，返回 NULL
    if (dictSize(d) == 0) return NULL;

    // 渐进式 rehash
    if (dictIsRehashing(d)) _dictRehashStep(d);

    // 根据哈希表的使用情况，随机从哈希表中挑选一个非空表头
    // O(N)
    if (dictIsRehashing(d)) {
        do {
            h = random() % (d->ht[0].size+d->ht[1].size);
            he = (h >= d->ht[0].size) ? d->ht[1].table[h - d->ht[0].size] :
                                      d->ht[0].table[h];
        } while(he == NULL);
    } else {
        do {
            h = random() & d->ht[0].sizemask;
            he = d->ht[0].table[h];
        } while(he == NULL);
    }

    /* Now we found a non empty bucket, but it is a linked
     * list and we need to get a random element from the list.
     * The only sane way to do so is counting the elements and
     * select a random index. */
    // 随机获取链表中的其中一个元素
    // 计算链表长度
    // 因为链表的元素数量通常为 1 或者一个很小的比率
    // 所以这个操作可以看作是 O(1)
    listlen = 0;
    //上面的while保证了he代表的桶至少有一个元素
    orighe = he;
    while(he) {
        he = he->next;
        listlen++;
    }
    // 计算随机值
    listele = random() % listlen;

    // 取出对应节点
    he = orighe;
    while(listele--) he = he->next;

    // 返回
    return he;
}

/* ------------------------- private functions ------------------------------ */

/* Expand the hash table if needed */
/*
 * 根据需要，扩展字典的大小
 * （也即是对 ht[0] 进行 rehash）
 *
 * T = O(N)
 */
static int _dictExpandIfNeeded(dict *d)
{
    // 已经在渐进式 rehash 当中，直接返回
    if (dictIsRehashing(d)) return DICT_OK;

    // 如果哈希表为空，那么将它扩展为初始大小
    // O(N)
    if (d->ht[0].size == 0) return dictExpand(d, DICT_HT_INITIAL_SIZE);

    /* If we reached the 1:1 ratio, and we are allowed to resize the hash
     * table (global setting) or we should avoid it but the ratio between
     * elements/buckets is over the "safe" threshold, we resize doubling
     * the number of buckets. */    
    // 如果哈希表的已用节点数 >= 哈希表的大小，
    // 并且以下条件任一个为真：
    //   1) dict_can_resize 为真
    //   2) 已用节点数除以哈希表大小之比大于 
    //      dict_force_resize_ratio
    // 那么调用 dictExpand 对哈希表进行扩展
    // 扩展的体积至少为已使用节点数的两倍
    // O(N)
    if (d->ht[0].used >= d->ht[0].size &&
        (dict_can_resize ||
         d->ht[0].used/d->ht[0].size > dict_force_resize_ratio))
    {
        return dictExpand(d, d->ht[0].used*2);
    }

    return DICT_OK;
}

/*
 * 计算哈希表的需要扩展的新大小
 *
 * 如果 size 小于等于 DICT_HT_INITIAL_SIZE ，
 * 那么返回 DICT_HT_INITIAL_SIZE ，
 * 否则这个值为第一个 >= size 的二次幂。
 *
 * T = O(N)
 */
static unsigned long _dictNextPower(unsigned long size)
{
    unsigned long i = DICT_HT_INITIAL_SIZE;

    if (size >= LONG_MAX) return LONG_MAX;
    while(1) {
        if (i >= size)
            return i;
        i *= 2;
    }
}

/* Returns the index of a free slot that can be populated with
 * an hash entry for the given 'key'.
 * If the key already exists, -1 is returned.
 *
 * Note that if we are in the process of rehashing the hash table, the
 * index is always returned in the context of the second (new) hash table. */
/*
 * 返回给定 key 可以哈希表数组存放的索引。
 *
 * 如果 key 已经存在于哈希表，返回 -1 。
 *
 * 当正在执行 rehash 的时候，
 * 返回的 index 总是应用于第二个（新的）哈希表
 *
 * T = O(1)
 */
static int _dictKeyIndex(dict *d, const void *key)
{
    unsigned int h, idx, table;
    dictEntry *he;

    // 如果有需要，对字典进行扩展
    if (_dictExpandIfNeeded(d) == DICT_ERR)
        return -1;

    // 计算 key 的哈希值
    h = dictHashKey(d, key);

    // 在两个哈希表中进行查找给定 key
    for (table = 0; table <= 1; table++) {

        // 根据哈希值和哈希表的 sizemask 
        // 计算出 key 可能出现在 table 数组中的哪个索引
        idx = h & d->ht[table].sizemask;

        // 在节点链表里查找给定 key
        // 因为链表的元素数量通常为 1 或者是一个很小的比率
        // 所以可以将这个操作看作 O(1) 来处理
        he = d->ht[table].table[idx];
        while(he) {
            // key 已经存在
            if (dictCompareKeys(d, key, he->key))
                return -1;

            he = he->next;
        }

        //如果没有处在rehash状态，数据只能保存在ht[0]中，所以直接跳出循环
        //下面返回的就是在ht[0]的下标
        if (!dictIsRehashing(d)) break;
    }
	//此时返回有两种结果：
	//第一：如果没有处在rehash状态，则返回的结果就是ht[0]的下标
	//第二：如果处于rehash状态，则只可能出现在ht[1]中，因为如果在第一个ht中没有找到这个key则必须计算他在第二个ht中的下标进行查找，所以，返回的一定是第二个ht的下标
	//上层函数可以借助这一点来判断往哪一个ht中添加节点
    return idx;
}

/*
 * 清空整个字典
 *
 * T = O(N)
 */
void dictEmpty(dict *d) {
    _dictClear(d,&d->ht[0]);
    _dictClear(d,&d->ht[1]);
    d->rehashidx = -1;
    d->iterators = 0;
}

/*
 * 打开 rehash 标识
 *
 * T = O(1)
 */
void dictEnableResize(void) {
    dict_can_resize = 1;
}

/*
 * 关闭 rehash 标识
 *
 * T = O(1)
 */
void dictDisableResize(void) {
    dict_can_resize = 0;
}