redis源码阅读-dict字典

最新推荐文章于 2022-07-02 11:16:55 发布

非自然码农

最新推荐文章于 2022-07-02 11:16:55 发布

阅读量975

点赞数 2

分类专栏： redis源码阅读文章标签： redis c语言

本文链接：https://blog.csdn.net/weixin_45704279/article/details/124588542

版权

redis源码阅读专栏收录该内容

6 篇文章 1 订阅

订阅专栏

redis源码阅读-dict字典

字典是redis中应用非常广泛的一种抽象数据结构，字典是key-value映射关系，主要特点就是一一对应的关系。字典中的每个键都是独一无二的。

字典在redis中的应用有：数据库底层实现，哈希键的底层实现。

dict的实现

基本数据结构

关于dict的数据结构一共有五个：dictEntry,dictType,dictht,dict,dictIterator.

dictEntry是dict中基本元素类型。

dictType则对外提供了几个自定义函数，供给使用者填充

dictht是哈希表

dict是字典

dictIterator是字典的迭代器。****

  //字典的元素。key-value对
  typedef struct dictEntry {
    void *key;//键
    union {
        void *val;
        uint64_t u64;//不知道这两个值是干什么的
        int64_t s64;
        double d;
    } v;//值
    struct dictEntry *next;//指向下一元素，此处可以看出，dict解决hash冲突的方法是链地址法。
  } dictEntry;
//typedef是定义别名的写法，此处在dictEntry结构体后，定义的别名为dictEntry，有需要的话可以改。

//dict的类型函数，。
 typedef struct dictType {    
    uint64_t (*hashFunction)(const void *key);//计算哈希值的函数
    void *(*keyDup)(void *privdata, const void *key);//复制键
    void *(*valDup)(void *privdata, const void *obj);//复制值
    int (*keyCompare)(void *privdata, const void *key1, const void *key2);//比较键
 void (*keyDestructor)(void *privdata, void *key);//销毁键
 void (*valDestructor)(void *privdata, void *obj);//销毁值
  } dictType;
/* This is our hash table structure. Every dictionary has two of this as we

   * implement incremental rehashing, for the old to the new table. */
     //字典的哈希表
     typedef struct dictht {
      dictEntry **table;
      unsigned long size;//该size指的是，索引的个数.
      unsigned long sizemask;//该掩码，是根据hash值计算索引号的使用的：eg.size=5, 那么索引范围为0-4,hash值为h,那么根据哈希值计算出的索引应该：h%sizemask
      unsigned long used;//该used指的是，已使用的索引个数.
     } dictht;

      //字典本身
       typedef struct dict {
      dictType *type;
      void *privdata;
      dictht ht[2];//一个字典通常有两个hash表 ht[0],ht[1]。一般情况下，ht[1]不闲置，只有在rehash时，使用ht[1]作为缓冲。
      long rehashidx; /* rehash的id（索引），表示下一次rehash的开始索引*/
      unsigned long iterators; /* number of iterators currently running */
       } dict;

  /* If safe is set to 1 this is a safe iterator, that means, you can call

   * dictAdd, dictFind, and other functions against the dictionary even while
   * iterating. Otherwise it is a non safe iterator, and only dictNext()
   * should be called while iterating. */
     //dict的迭代器
     typedef struct dictIterator {
      dict *d;
      long index;
      int table, safe;
      dictEntry *entry, *nextEntry;
      /* unsafe iterator fingerprint for misuse detection. */
      long long fingerprint;
     } dictIterator;

丰富的API

dict也对外提供了丰富的api，下面挑选较为经典的：create，add，delete，find，release，rehash。进行分析。

dict *dictCreate(dictType *type, void *privDataPtr);
int dictExpand(dict *d, unsigned long size);
int dictAdd(dict *d, void *key, void *val);
dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing);
dictEntry *dictAddOrFind(dict *d, void *key);
int dictReplace(dict *d, void *key, void *val);
int dictDelete(dict *d, const void *key);
dictEntry *dictUnlink(dict *ht, const void *key);
void dictFreeUnlinkedEntry(dict *d, dictEntry *he);
void dictRelease(dict *d);
dictEntry * dictFind(dict *d, const void *key);
void *dictFetchValue(dict *d, const void *key);
int dictResize(dict *d);
dictIterator *dictGetIterator(dict *d);
dictIterator *dictGetSafeIterator(dict *d);
dictEntry *dictNext(dictIterator *iter);
void dictReleaseIterator(dictIterator *iter);
dictEntry *dictGetRandomKey(dict *d);
dictEntry *dictGetFairRandomKey(dict *d);
unsigned int dictGetSomeKeys(dict *d, dictEntry **des, unsigned int count);
void dictGetStats(char *buf, size_t bufsize, dict *d);
uint64_t dictGenHashFunction(const void *key, int len);
uint64_t dictGenCaseHashFunction(const unsigned char *buf, int len);
void dictEmpty(dict *d, void(callback)(void*));
void dictEnableResize(void);
void dictDisableResize(void);
int dictRehash(dict *d, int n);
int dictRehashMilliseconds(dict *d, int ms);
void dictSetHashFunctionSeed(uint8_t *seed);
uint8_t *dictGetHashFunctionSeed(void);
unsigned long dictScan(dict *d, unsigned long v, dictScanFunction *fn, dictScanBucketFunction *bucketfn, void *privdata);
uint64_t dictGetHash(dict *d, const void *key);
dictEntry **dictFindEntryRefByPtrAndHash(dict *d, const void *oldptr, uint64_t hash);

创建dict：

static void _dictReset(dictht *ht)
{
    ht->table = NULL;//重置一个hashtable指针，让指针指向NULL，但是并不释放指针指向的空间。
    ht->size = 0;//令总计索引数为0
    ht->sizemask = 0;//令sizemask=0
    ht->used = 0;//令节点数目为0
}

/* Create a new hash table */
dict *dictCreate(dictType *type,
        void *privDataPtr)
{

    dict *d = zmalloc(sizeof(*d));//为dict的指针d分配内存。
    _dictInit(d,type,privDataPtr);//调用初始化函数，初始化d。
    return d;
}

/* Initialize the hash table */
int _dictInit(dict *d, dictType *type,
        void *privDataPtr)
{
    _dictReset(&d->ht[0]);//重置ht[0].
    _dictReset(&d->ht[1]);
    d->type = type;//设置dicttype
    d->privdata = privDataPtr;
    d->rehashidx = -1;//设置rehashidx=-1，表明不在rehash状态中
    d->iterators = 0;//设置itreators为0，表明iterator安全。
    return DICT_OK;
}

添加键值对：

/* Add an element to the target hash table */
int dictAdd(dict *d, void *key, void *val)
{
    dictEntry *entry = dictAddRaw(d,key,NULL);//以该key为键的节点加入table中，并返回dictEntry指针，若指针为NULL表示该key值已经存在
    if (!entry) return DICT_ERR;
    dictSetVal(d, entry, val);//仍旧是宏定义的函数，设置该节点的value
    return DICT_OK;
}
//dictAddRaw，将以该key为键的节点加入table中，并返回dictEntry指针，设置value的操作仍然交给用户完成
dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing)
{
    long index;
    dictEntry *entry;//新建节点指针
    dictht *ht;//表指针

    if (dictIsRehashing(d)) _dictRehashStep(d);//如果当前的dict在rehash中，则进行一步rehash操作

    /* Get the index of the new element, or -1 if
     * the element already exists. */
     //判断该key是否已经存在，若不存在则给出应该放置的index，存在则返回-1；
    if ((index = _dictKeyIndex(d, key, dictHashKey(d,key), existing)) == -1)
        return NULL;

    /* Allocate the memory and store the new entry.
     * Insert the element in top, with the assumption that in a database
     * system it is more likely that recently added entries are accessed
     * more frequently. */
    //根据是否在rehash，判断将该元素加在ht[0]or ht[1]。
    ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];
	//为节点分配内存
    entry = zmalloc(sizeof(*entry));
	//将该节点的next指针，指向当前索引的头节点。
    entry->next = ht->table[index];
	//将该节点设置为头节点
    ht->table[index] = entry;
	//节点总数+1
    ht->used++;

    /* Set the hash entry fields. */
	//设置该节点的key值
	dictSetKey(d, entry, key);
    return entry;
}
//宏定义：设置节点的key值。
#define dictSetVal(d, entry, _val_) do { \
    if ((d)->type->valDup) \
        (entry)->v.val = (d)->type->valDup((d)->privdata, _val_); \
    else \
        (entry)->v.val = (_val_); \
} while(0)

此处add节点时，先向table中添加一个设置好key的节点，再设置好该节点的value，没太搞懂为什么要分开。官方注释写到：

这个函数添加了一个条目，但是没有设置一个值，而是返回dictEntry结构给用户，这将确保他可以按照他想要的方式去填充这个节点。

删除某个节点：

删除节点的功能由dictGenericDelete提供，该函数可以由dictDelete和dictUnlink调用，前者会销毁被删除节点的内存。后者只是将该节点从链上取出。

static dictEntry *dictGenericDelete(dict *d, const void *key, int nofree) {
    uint64_t h, idx;
    dictEntry *he, *prevHe;
    int table;
	//如果两个表中没有节点，直接返回NULL
    if (d->ht[0].used == 0 && d->ht[1].used == 0) return NULL;
	//如果在rehash阶段，触发一次rehash操作
    if (dictIsRehashing(d)) _dictRehashStep(d);
	//根据key计算出hash值
	h = dictHashKey(d, key);
	//先查ht[0]，未找到，且正处于rehash阶段，则到ht[1]中再次查询
    for (table = 0; table <= 1; table++) {
		//找到当前idx的头节点
		idx = h & d->ht[table].sizemask;
        he = d->ht[table].table[idx];
        prevHe = NULL;
		//在该链表中查找key节点。
        while(he) {
            if (key==he->key || dictCompareKeys(d, key, he->key)) {
                /* Unlink the element from the list */
				//将该节点的前后节点连接
				if (prevHe)
                    prevHe->next = he->next;
                else
                    d->ht[table].table[idx] = he->next;
				//销毁该节点的key，value。
                if (!nofree) {
                    dictFreeKey(d, he);
                    dictFreeVal(d, he);
                    zfree(he);
                }
				//节点总数减1
                d->ht[table].used--;
				
				return he;
            }
            prevHe = he;
            he = he->next;
        }
		//若不处于rehash阶段则没有必要去查询ht[1]
        if (!dictIsRehashing(d)) break;
    }
    return NULL; /* not found */
}


int dictDelete(dict *ht, const void *key) {
    return dictGenericDelete(ht,key,0) ? DICT_OK : DICT_ERR;
}

dictEntry *dictUnlink(dict *ht, const void *key) {
    return dictGenericDelete(ht,key,1);
}

查找键值对：

dictEntry *dictFind(dict *d, const void *key)
{
    dictEntry *he;
    uint64_t h, idx, table;
	//dict为空，返回NULL
    if (dictSize(d) == 0) return NULL; /* dict is empty */
	//处于rehash过程中，触发一次rehash操作
	if (dictIsRehashing(d)) _dictRehashStep(d);
	
    h = dictHashKey(d, key);
    for (table = 0; table <= 1; table++) {
        idx = h & d->ht[table].sizemask;
        he = d->ht[table].table[idx];
		//从该index的头结点开始查找，找到则直接返回节点指针。
		while(he) {
            if (key==he->key || dictCompareKeys(d, key, he->key))
                return he;
            he = he->next;
        }
        if (!dictIsRehashing(d)) return NULL;
    }
    return NULL;
}

释放整个dict：

int _dictClear(dict *d, dictht *ht, void(callback)(void *)) {
    unsigned long i;
	//释放每一条链表
    /* Free all the elements */
    for (i = 0; i < ht->size && ht->used > 0; i++) {
        dictEntry *he, *nextHe;

        if (callback && (i & 65535) == 0) callback(d->privdata);

        if ((he = ht->table[i]) == NULL) continue;
        while(he) {
            nextHe = he->next;
            dictFreeKey(d, he);//释放节点key
            dictFreeVal(d, he);//释放节点value
            zfree(he);//释放该节点
            ht->used--;//全部节点数--
            he = nextHe;//将he指向下一个节点
        }
    }
    /* Free the table and the allocated cache structure */
    zfree(ht->table);//释放表
    /* Re-initialize the table */
    _dictReset(ht);//重置ht
    return DICT_OK; /* never fails */
}
void dictRelease(dict *d)
{
    _dictClear(d,&d->ht[0],NULL);//释放ht[0]
    _dictClear(d,&d->ht[1],NULL);//释放ht[1]
    zfree(d);//删除dict
}

再哈希算法：

再哈希指的是，hash表中节点数/索引数过大（>1），导致查找和插入时，经常发生hash碰撞，极大降低dict的效率，因而对dict的hashtable进行一个索引扩容。

其基本原理是，启用闲置的ht[1]表，依次将ht[0]中的节点，全部重新计算hash值，和索引值，并放置到ht[1]中，待节点全部移动到ht[1]后，令ht[0]=ht[1]，并释放ht[1]指针.

由于redis是单线程结构，一次性将全部节点移动，很容易造成服务器宕机，于是redis采取，分步再哈希的策略，即，每次移动一条链表上的节点到另一哈希表。将分步再哈希由 add，find，delete，等函数触发。这样既保证了移动节点与dict正常的使用不冲突，又减少了宕机的可能。可谓是一举两得。

同时，在rehash过程中，所有的find，delete，等操作，都会先在ht[0]，ht[1]上先后进行，但是add操作则只会在ht[1]上进行，这保证了ht[0]中的节点只会减少，最终所有节点都会被转移到ht[1]中。rehash结束。

int dictRehash(dict *d, int n) {
	//设置一次rehash，中最多可以访问到的空index数，超过这个数量则直接结束这次rehash
    int empty_visits = n*10; /* Max number of empty buckets to visit. */
    if (!dictIsRehashing(d)) return 0;
	//n为本次rehash要移动的索引数。
    while(n-- && d->ht[0].used != 0) {
        dictEntry *de, *nextde;

        /* Note that rehashidx can't overflow as we are sure there are more
         * elements because ht[0].used != 0 */
        //如果rehashidx大于等于索引个数，则说明rehashidx溢出
        assert(d->ht[0].size > (unsigned long)d->rehashidx);
		//从当前rehashid开始，找到一条不为空的index链表
		
        while(d->ht[0].table[d->rehashidx] == NULL) {
            d->rehashidx++;
            if (--empty_visits == 0) return 1;
        }
		
        de = d->ht[0].table[d->rehashidx];
        /* Move all the keys in this bucket from the old to the new hash HT */
		//将该链表所有元素移动到ht[1]中
        while(de) {
            uint64_t h;

            nextde = de->next;
            /* Get the index in the new hash table */
            h = dictHashKey(d, de->key) & d->ht[1].sizemask;//计算该节点在ht[1]中的index。
            de->next = d->ht[1].table[h];
            d->ht[1].table[h] = de;
            d->ht[0].used--;
            d->ht[1].used++;
            de = nextde;
        }
		//设置该index位置的头节点为NULL
        d->ht[0].table[d->rehashidx] = NULL;
        d->rehashidx++;
    }
	//如果整个ht[0]都移动到ht[1]了，则结束rehash阶段
    /* Check if we already rehashed the whole table... */
    if (d->ht[0].used == 0) {
        zfree(d->ht[0].table);//释放ht[0]的table。
        d->ht[0] = d->ht[1];//令ht[0]=ht[1];
        _dictReset(&d->ht[1]);//重置ht[1]
        d->rehashidx = -1;//设rehashidx=-1,表示rehash停止。
        return 0;
    }

    /* More to rehash... */
    return 1;
}

dict总结：

Redis字典结构采用哈希表作为底层实现，每个字典包括两个哈希表，一个用来平常使用，另一个在rehash的时候使用。Redis提供了三种哈希算法，对整数，字符串等类型的键都能较好的处理。Redis的哈希表采用了链地址法来解决哈希冲突。最有特点的是，Redis在对字典进行扩容和收缩时，需要对哈希表中的所有键值对rehash到新哈希表里面，这个rehash操作不是一次性完成的，而是采用渐进式完成，这一措施使得rehash过程不会影响Redis对字典进行增删查改操作的效率。

非自然码农

关注

2
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
redis源码阅读-dict字典

redis源码阅读-dict字典字典是redis中应用非常广泛的一种抽象数据结构，字典是key-value映射关系，主要特点就是一一对应的关系。字典中的每个键都是独一无二的。字典在redis中的应用有：数据库底层实现，哈希键的底层实现。dict的实现基本数据结构关于dict的数据结构一共有五个：dictEntry,dictType,dictht,dict,dictIterator.dictEntry是dict中基本元素类型。dictType则对外提供了几个自定义函数，供给使用者填充d
复制链接

扫一扫