【redis】3.2.8rehash存在的隐患，最新5.0+版本的改进-源码分析

最新推荐文章于 2022-11-18 16:12:15 发布
自驱
最新推荐文章于 2022-11-18 16:12:15 发布
阅读量322
点赞数
分类专栏：架构 redis
本文链接：https://blog.csdn.net/qfzhangwei/article/details/113363457
版权
架构同时被 2 个专栏收录
161 篇文章 4 订阅
订阅专栏
redis
33 篇文章 0 订阅
订阅专栏
1 隐患介绍

至美团博客
2 老版本 VS 新版本

// 旧版本
int dictExpand(dict *d, unsigned long size) 
{ 
    dictht n; // 新哈希表
    unsigned long realsize = _dictNextPower(size);  // 计算扩展或缩放新哈希表的大小(调用下面函数_dictNextPower())
 
    /* 如果正在rehash或者新哈希表的大小小于现已使用，则返回error */ 
    if (dictIsRehashing(d) || d->ht[0].used > size) 
        return DICT_ERR; 
 
    /* 如果计算出哈希表size与现哈希表大小一样，也返回error */ 
    if (realsize == d->ht[0].size) return DICT_ERR; 
 
    /* 初始化新哈希表 */ 
    n.size = realsize; 
    n.sizemask = realsize-1; 
    n.table = zcalloc(realsize*sizeof(dictEntry*));  // 为table指向dictEntry 分配内存
    n.used = 0; 
 
    /* 如果ht[0] 为空，则初始化ht[0]为当前键值对的哈希表 */ 
    if (d->ht[0].table == NULL) { 
        d->ht[0] = n; 
        return DICT_OK; 
    } 
 
    /* 如果ht[0]不为空，则初始化ht[1]为当前键值对的哈希表，并开启渐进式rehash模式 */ 
    d->ht[1] = n; 
    d->rehashidx = 0; 
    return DICT_OK; 
}

// 新版本 增加malloc_failed参数，如果为1表示分配失败
// when malloc_failed is non-NULL, it'll avoid panic if malloc fails (in which case it'll be set to 1).
int _dictExpand(dict *d, unsigned long size, int* malloc_failed)
{

    // 如果失败，修改状态再次尝试
    if (malloc_failed) *malloc_failed = 0;

    /* the size is invalid if it is smaller than the number of
     * elements already inside the hash table */
    /* 如果正在rehash或者新哈希表的大小小于现已使用（只能说元素增加的太快了，比如翻倍增加），则返回error */  
    if (dictIsRehashing(d) || d->ht[0].used > size)
        return DICT_ERR;

    dictht n; /* the new hash table */
    // 计算扩展或缩放新哈希表的大小(调用下面函数_dictNextPower())  
    // 比原来版本这个计算发生了后移，如果提前返回情况，这不时不必要计算的
    unsigned long realsize = _dictNextPower(size);

    /* Rehashing to the same table size is not useful. */
    /* 如果计算出哈希表size与现哈希表大小一样(这么巧，有点疑问)，也返回error */ 
    if (realsize == d->ht[0].size) return DICT_ERR;

    /* Allocate the new hash table and initialize all pointers to NULL */
    n.size = realsize;
    // 掩码
    n.sizemask = realsize-1;
    // 需要尝试 ztrycalloc 分配内存
    if (malloc_failed) {
        n.table = ztrycalloc(realsize*sizeof(dictEntry*));
        *malloc_failed = n.table == NULL;
        // 如果为不为0，表示分配失败了，返回error
        if (*malloc_failed)
            return DICT_ERR;
    } else
        // 普通分配情况
        n.table = zcalloc(realsize*sizeof(dictEntry*));

    // 下面逻辑保持
    n.used = 0;

    /* Is this the first initialization? If so it's not really a rehashing
     * we just set the first hash table so that it can accept keys. */
    if (d->ht[0].table == NULL) {
        d->ht[0] = n;
        return DICT_OK;
    }

    /* Prepare a second hash table for incremental rehashing */
    d->ht[1] = n;
    d->rehashidx = 0;
    return DICT_OK;
}


// 以下是所有的源码解析

// 普通分配内存函数
/* Allocate memory and zero it or panic */
void *zcalloc(size_t size) {
    void *ptr = ztrycalloc_usable(size, NULL);
    if (!ptr) zmalloc_oom_handler(size);  //  *ptr == NULL oom 处理
    return ptr;
}



// 新增重内存分配函数
n.table = ztrycalloc(realsize*sizeof(dictEntry*));

源码：
/* Try allocating memory, and return NULL if failed. */
void *ztrycalloc(size_t size) {
    void *ptr = ztrycalloc_usable(size, NULL);
    return ptr;
}


可见都调用了：
void *ptr = ztrycalloc_usable(size, NULL);


// 该函数#if、#elif、#else 和 #endif 都是预处理命令
// 这些操作都是在预处理阶段完成的，多余的代码以及所有的宏都不会参与编译，不仅保证了代码的正确性，还减小了编译后文件的体积
// 这种能够根据不同情况编译不同代码、产生不同目标文件的机制，称为条件编译。条件编译是预处理程序的功能，不是编译器的功能。
/* Try allocating memory and zero it, and return NULL if failed.
 * '*usable' is set to the usable size if non NULL. */
void *ztrycalloc_usable(size_t size, size_t *usable) {
    void *ptr = calloc(1, size+PREFIX_SIZE);  // 分配内存
    if (ptr == NULL) return NULL;

#ifdef HAVE_MALLOC_SIZE
    size = zmalloc_size(ptr);          // 计算内存大小，头信息的区别
    update_zmalloc_stat_alloc(size);   // 更新内存使用计数器
    if (usable) *usable = size;
    return ptr;
#else
    *((size_t*)ptr) = size;
    update_zmalloc_stat_alloc(size+PREFIX_SIZE);
    if (usable) *usable = size;
    return (char*)ptr+PREFIX_SIZE;
#endif
}

// 计算内存大小，头信息的区别
/* Provide zmalloc_size() for systems where this function is not provided by
 * malloc itself, given that in that case we store a header with this
 * information as the first bytes of every allocation. */
#ifndef HAVE_MALLOC_SIZE
size_t zmalloc_size(void *ptr) {
    void *realptr = (char*)ptr-PREFIX_SIZE;
    size_t size = *((size_t*)realptr);
    return size+PREFIX_SIZE;
}
size_t zmalloc_usable_size(void *ptr) {
    return zmalloc_size(ptr)-PREFIX_SIZE;
}
#endif





/* Our hash table capability is a power of two */
static unsigned long _dictNextPower(unsigned long size)
{
    unsigned long i = DICT_HT_INITIAL_SIZE;

    if (size >= LONG_MAX) return LONG_MAX + 1LU;
    while(1) {
        if (i >= size)
            return i;
        i *= 2;
    }
}


// OOM预判
/* Return 1 if used memory is more than maxmemory after allocating more memory,
 * return 0 if not. Redis may reject user's requests or evict some keys if used
 * memory exceeds maxmemory, especially, when we allocate huge memory at once. */
int overMaxmemoryAfterAlloc(size_t moremem) {
    if (!server.maxmemory) return  0; /* No limit. */

    // 如果扩用后内存小于最大内存 则返回0
    /* Check quickly. */
    size_t mem_used = zmalloc_used_memory();
    if (mem_used + moremem <= server.maxmemory) return 0;

    // 没有被计数的空闲内存
    size_t overhead = freeMemoryGetNotCountedMemory();
    mem_used = (mem_used > overhead) ? mem_used - overhead : 0;
    return mem_used + moremem > server.maxmemory;
}


^
|



/* Return 1 if currently we allow dict to expand. Dict may allocate huge
 * memory to contain hash buckets when dict expands, that may lead redis
 * rejects user's requests or evicts some keys, we can stop dict to expand
 * provisionally if used memory will be over maxmemory after dict expands,
 * but to guarantee the performance of redis, we still allow dict to expand
 * if dict load factor exceeds HASHTABLE_MAX_LOAD_FACTOR. */

//  最大负载因子 1.618 ,小于等于该值则尝试预判内存溢出
//  #define HASHTABLE_MAX_LOAD_FACTOR 1.618   /* Maximum hash table load factor. */

int dictExpandAllowed(size_t moreMem, double usedRatio) {
    if (usedRatio <= HASHTABLE_MAX_LOAD_FACTOR) {
        return !overMaxmemoryAfterAlloc(moreMem);  // 于等于该值则尝试预判内存溢出
    } else {
        return 1;
    }
}


^
|


// dictType的实现者
/* Db->dict, keys are sds strings, vals are Redis objects. */
dictType dbDictType = {
    dictSdsHash,                /* hash function */
    NULL,                       /* key dup */
    NULL,                       /* val dup */
    dictSdsKeyCompare,          /* key compare */
    dictSdsDestructor,          /* key destructor */
    dictObjectDestructor,       /* val destructor */
    dictExpandAllowed           /* allow to expand */
};



^
|


// 容量预判  预判函数  expandAllowed 有实现自定义
// int (*expandAllowed)(size_t moreMem, double usedRatio);

/* Because we may need to allocate huge memory chunk at once when dict
 * expands, we will check this allocation is allowed or not if the dict
 * type has expandAllowed member function. */  
static int dictTypeExpandAllowed(dict *d) {
    if (d->type->expandAllowed == NULL) return 1;
    return d->type->expandAllowed(
                    // 预判函数  expandAllowed 自定义实现如下  参数 （内存值，目前使用比率即负载因子）
                    _dictNextPower(d->ht[0].used + 1) * sizeof(dictEntry*),
                    (double)d->ht[0].used / d->ht[0].size);
}

^
|

/* Expand the hash table if needed */
static int _dictExpandIfNeeded(dict *d)
{    
    // 如果正在进行Rehash，则直接返回
    /* Incremental rehashing already in progress. Return. */
    if (dictIsRehashing(d)) return DICT_OK;
    
    // 如果ht[0]字典为空，则创建并初始化ht[0]  
    /* If the hash table is empty expand it to the initial size. */
    if (d->ht[0].size == 0) return dictExpand(d, DICT_HT_INITIAL_SIZE);


    /* (ht[0].used/ht[0].size)>=1前提下，
       当满足dict_can_resize=1或ht[0].used/t[0].size>5时，便对字典进行扩展 */
    // static unsigned int dict_force_resize_ratio = 5;   
    /* If we reached the 1:1 ratio, and we are allowed to resize the hash
     * table (global setting) or we should avoid it but the ratio between
     * elements/buckets is over the "safe" threshold, we resize doubling
     * the number of buckets. */
    if (d->ht[0].used >= d->ht[0].size &&
        (dict_can_resize ||
         d->ht[0].used/d->ht[0].size > dict_force_resize_ratio) &&
        dictTypeExpandAllowed(d))                 // 增加容量预判
    {
        return dictExpand(d, d->ht[0].used + 1);  // 原来是扩容doubling--->return dictExpand(d, d->ht[0].used*2);   // 扩展字典为原来的2倍
    }



    return DICT_OK;
}


^
|

/* Returns the index of a free slot that can be populated with
 * a hash entry for the given 'key'.
 * If the key already exists, -1 is returned
 * and the optional output parameter may be filled.
 *
 * Note that if we are in the process of rehashing the hash table, the
 * index is always returned in the context of the second (new) hash table. */
static long _dictKeyIndex(dict *d, const void *key, uint64_t hash, dictEntry **existing)
{
    unsigned long idx, table;
    dictEntry *he;
    if (existing) *existing = NULL;

    /* 检查是否需要扩展哈希表，不足则扩展 */ 
    /* Expand the hash table if needed */
    if (_dictExpandIfNeeded(d) == DICT_ERR)
        return -1;
    for (table = 0; table <= 1; table++) {
        idx = hash & d->ht[table].sizemask;
        /* Search if this slot does not already contain the given key */
        he = d->ht[table].table[idx];
        while(he) {
            if (key==he->key || dictCompareKeys(d, key, he->key)) {
                if (existing) *existing = he;
                return -1;
            }
            he = he->next;
        }
        if (!dictIsRehashing(d)) break;
    }
    return idx;
}



^
|

/* Low level add or find:
 * This function adds the entry but instead of setting a value returns the
 * dictEntry structure to the user, that will make sure to fill the value
 * field as he wishes.
 *
 * This function is also directly exposed to the user API to be called
 * mainly in order to store non-pointers inside the hash value, example:
 *
 * entry = dictAddRaw(dict,mykey,NULL);
 * if (entry != NULL) dictSetSignedIntegerVal(entry,1000);
 *
 * Return values:
 *
 * If key already exists NULL is returned, and "*existing" is populated
 * with the existing entry if existing is not NULL.
 *
 * If key was added, the hash entry is returned to be manipulated by the caller.
 */
dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing)
{
    long index;
    dictEntry *entry;
    dictht *ht;

    if (dictIsRehashing(d)) _dictRehashStep(d);

    /* Get the index of the new element, or -1 if
     * the element already exists. */
    // 如果已经存在返回-1
    if ((index = _dictKeyIndex(d, key, dictHashKey(d,key), existing)) == -1)
        return NULL;

    /* Allocate the memory and store the new entry.
     * Insert the element in top, with the assumption that in a database
     * system it is more likely that recently added entries are accessed
     * more frequently. */
    ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];
    entry = zmalloc(sizeof(*entry));
    entry->next = ht->table[index];
    ht->table[index] = entry;
    ht->used++;

    /* Set the hash entry fields. */
    dictSetKey(d, entry, key);
    return entry;
}

^
|

// 增加元素
/* Add an element to the target hash table */
int dictAdd(dict *d, void *key, void *val)
{
    dictEntry *entry = dictAddRaw(d,key,NULL);

    if (!entry) return DICT_ERR;
    dictSetVal(d, entry, val);
    return DICT_OK;
}