redis基础结构之字典

yu96848

于 2019-06-20 23:25:05 发布

阅读量106

点赞数 1

分类专栏： redis统一学习

本文链接：https://blog.csdn.net/m0_37232228/article/details/93138476

版权

redis统一学习专栏收录该内容

4 篇文章 0 订阅

订阅专栏

字典

作用: 数据库和hash键

字典的结构

typedef struct dict {

    // 类型特定函数  
    //type以及privdata为了创建多态字典存在
    dictType *type;

    // 私有数据
    void *privdata;

    // 哈希表 ht[1]主要用于rehash
    dictht ht[2];

    // rehash 索引
    // 当 rehash 不在进行时，值为 -1
    int rehashidx; /* rehashing not in progress if rehashidx == -1 */
     // 目前正在运行的安全迭代器的数量
    int iterators; /* number of iterators currently running */

} dict;

typedef struct dictIterator {

如果 safe 属性的值为 1 ，那么在迭代进行的过程中，
程序仍然可以执行 dictAdd 、 dictFind 和其他函数，对字典进行修改。
如果 safe 不为 1 ，那么程序只会调用 dictNext 对字典进行迭代，
// 被迭代的字典
dict *d;

// table ：正在被迭代的哈希表号码，值可以是 0 或 1 。
// index ：迭代器当前所指向的哈希表索引位置。
// safe ：标识这个迭代器是否安全

int table, index, safe;

// entry ：当前迭代到的节点的指针
// nextEntry ：当前迭代节点的下一个节点
// 因为在安全迭代器运作时， entry 所指向的节点可能会被修改，
// 所以需要一个额外的指针来保存下一节点的位置，
// 从而防止指针丢失
dictEntry *entry, *nextEntry;

long long fingerprint; /* unsafe iterator fingerprint for misuse detection */
} dictIterator;

dictType结构:

typedef struct dictType {

// 计算哈希值的函数
unsigned int (*hashFunction)(const void *key);

// 复制键的函数
void *(*keyDup)(void *privdata, const void *key);

// 复制值的函数
void *(*valDup)(void *privdata, const void *obj);

// 对比键的函数
int (*keyCompare)(void *privdata, const void *key1, const void *key2);

// 销毁键的函数
void (*keyDestructor)(void *privdata, void *key);

// 销毁值的函数
void (*valDestructor)(void *privdata, void *obj);

} dictType;

字典的底层实现是hash表

typedef struct dictht {

    // 哈希表数组
    dictEntry **table;

    // 哈希表大小
    unsigned long size;

    // 哈希表大小掩码，用于计算索引值
    // 总是等于 size - 1
    unsigned long sizemask;

    // 该哈希表已有节点的数量
    unsigned long used;
} dictht;

示意图:
在这里插入图片描述
哈希表节点结构:

typedef struct dictEntry {

    // 键
    void *key;

    // 值
    union {
        void *val;
        uint64_t u64;
        int64_t s64;
    } v;

    // 指向下个哈希表节点，形成链表  采用链地址法解决冲突
    struct dictEntry *next;

} dictEntry;

字典示例图:

在这里插入图片描述

字典索引值计算以及冲突解决方法

索引值计算：如果index为0 ，那么存放在dictEntry*[0] 下
1).计算key的hash : hash = dict->type->hashFunction(key) 此处使用的hash算法为(MurmurHash)
2).以key的hash计算索引 : index = hash & dict->ht[x].sizemask

冲突解决以及插入效率:
1).冲突解决：链地址法解决冲突
2).插入效率: 对于index相同的key,value按头插法插入链

rehash

1.什么时候会进行rehash?
负载因子 = ht[0].used / ht[0].size
1).当没有进行持久化操作(bgsave和bgrewriteaof)时，负载因子大于等于1时会进行
2).当有在进行持久化操作(bgsave和bgrewriteaof)时，负载因子大于等于5会进行（目的：为了避免在子进程存在期间进行rehash操作(写时复制)）
3).当负载因子小于0.1时，也会进行rehash操作

2.rehash操作:
1).ht[1]空间分配:
扩展: ht[1].size >= ht[0].used*2 的2^n幂如ht[0].used= 4 ,那么ht[1]= 8刚好2^3
收缩：ht[1].size >= ht[0].used 的2^n幂
2).渐进式rehash
1)).为ht[1]分配空间
2)).将rehashindex值设为0，表示rehash开始
3)).每次对字典的执行操做时，除了执行指定操作外，还会将rehashindex索引上指定的所有键值对rehash到ht[1]上，每次完成rehashindex加1
4)).字典完全rehash到ht[1]后，重设rehashindex为-1，ht[1]变为ht[0]
在渐进rehash阶段会在ht[0]和ht[1]上进行删除，更新，查找等操作（先ht[0]后ht[1]）,所有的添加字典的操作都在ht[1]进行；

代码

//查找

dictEntry *dictFind(dict *d, const void *key)
{
    dictEntry *he;
    unsigned int h, idx, table;

    // 字典（的哈希表）为空
    if (d->ht[0].size == 0) return NULL; /* We don't have a table at all */

    // 如果条件允许的话，进行单步 rehash
    if (dictIsRehashing(d)) _dictRehashStep(d);

    // 计算键的哈希值
    h = dictHashKey(d, key);
    // 在字典的哈希表中查找这个键
    // T = O(1)
    for (table = 0; table <= 1; table++) {

        // 计算索引值
        idx = h & d->ht[table].sizemask;

        // 遍历给定索引上的链表的所有节点，查找 key
        he = d->ht[table].table[idx];
        // T = O(1)
        while(he) {

            if (dictCompareKeys(d, key, he->key))
                return he;

            he = he->next;
        }

        // 如果程序遍历完 0 号哈希表，仍然没找到指定的键的节点

//rehash

int dictRehash(dict *d, int n) {

    // 只可以在 rehash 进行中时执行
    if (!dictIsRehashing(d)) return 0;

    // 进行 N 步迁移
    // T = O(N)
    while(n--) {
        dictEntry *de, *nextde;

        /* Check if we already rehashed the whole table... */
        // 如果 0 号哈希表为空，那么表示 rehash 执行完毕
        // T = O(1)
        if (d->ht[0].used == 0) {
            // 释放 0 号哈希表
            zfree(d->ht[0].table);
            // 将原来的 1 号哈希表设置为新的 0 号哈希表
            d->ht[0] = d->ht[1];
            // 重置旧的 1 号哈希表
            _dictReset(&d->ht[1]);
            // 关闭 rehash 标识
            d->rehashidx = -1;
            // 返回 0 ，向调用者表示 rehash 已经完成
            return 0;
        }

        /* Note that rehashidx can't overflow as we are sure there are more
         * elements because ht[0].used != 0 */
        // 确保 rehashidx 没有越界
        assert(d->ht[0].size > (unsigned)d->rehashidx);
            // 略过数组中为空的索引，找到下一个非空索引
        while(d->ht[0].table[d->rehashidx] == NULL) d->rehashidx++;

        // 指向该索引的链表表头节点
        de = d->ht[0].table[d->rehashidx];
        /* Move all the keys in this bucket from the old to the new hash HT */
        // 将链表中的所有节点迁移到新哈希表
        // T = O(1)
        while(de) {
            unsigned int h;

            // 保存下个节点的指针
            nextde = de->next;

            /* Get the index in the new hash table */
            // 计算新哈希表的哈希值，以及节点插入的索引位置
            h = dictHashKey(d, de->key) & d->ht[1].sizemask;

            // 插入节点到新哈希表
            de->next = d->ht[1].table[h];
            d->ht[1].table[h] = de;

            // 更新计数器
            d->ht[0].used--;
            d->ht[1].used++;

            // 继续处理下个节点
            de = nextde;
                    }
        // 将刚迁移完的哈希表索引的指针设为空
        d->ht[0].table[d->rehashidx] = NULL;
        // 更新 rehash 索引
        d->rehashidx++;
    }
    return 1;
   //超时也会退出rehash
   int dictRehashMilliseconds(dict *d, int ms) {
    // 记录开始时间
    long long start = timeInMilliseconds();
    int rehashes = 0;

    while(dictRehash(d,100)) {
        rehashes += 100;
        // 如果时间已过，跳出
        if (timeInMilliseconds()-start > ms) break;
    }
    return rehashes;
}