1、目标
本文的主要目标是学习Dict字典的源码,它是键值对存储的重要数据结构
2、Dict字典
dict字典底层用数组和链表来存储数据,dict字典包含了type是字典类型,ht_table[2]是字典数组,字典数组的类型是dictEntry,这个字典数组包含2个dictEntry,一个存储数据,另一个用来rehash,ht_used[2]是数组,它表示字典数组中每个dictEntry已经添加的节点个数,rehashidx是这个字典是否rehash的标志,如果是-1表示这个字典没有rehash,如果rehash完成一个链表会rehashidx加1,pauserehash是这个字典暂停rehash的标志,0表示继续rehash,1表示暂停rehash,负数表示rehash出错,metadata数组是元数据,ht_size_exp是dict字典的ht_table数组长度(必须是2的n次方)
dict字典的结构如图所示,ht_table[2]包括2个dictEntry数组,其中ht_table[0]数组用来存储数据,在数组的每个下标位置有一个链表用来存储dictEntry数据,链表中的数据是通过dictEntry的next指针连接起来的,ht_table[1]用来渐近式rehash,如果没有rehash ht_table[1]默认是null
struct dictEntry {
void *key;
union {
void *val;
uint64_t u64;
int64_t s64;
double d;
} v;
struct dictEntry *next; /* Next entry in the same hash bucket. */
void *metadata[]; /* An arbitrary number of bytes (starting at a
* pointer-aligned address) of size as returned
* by dictType's dictEntryMetadataBytes(). */
};
dictEntry是哈希表,它用来存放键值对和下一个哈希表的指针,其中key是键,val是值,next是同一个哈希槽的下一个哈希表指针,可以形成同一个哈希槽的单向链表,metadata是元数据
重点分析一下dictCreate方法、dictAdd方法、_dictExpandIfNeeded方法、dictRehash方法
2.1 dictCreate方法
/* Create a new hash table */
dict *dictCreate(dictType *type)
{
size_t metasize = type->dictMetadataBytes ? type->dictMetadataBytes() : 0;
dict *d = zmalloc(sizeof(*d) + metasize);
if (metasize) {
memset(dictMetadata(d), 0, metasize);
}
_dictInit(d,type);
return d;
}
dictCreate方法会创建一个新的字典dict,会调用_dictInit方法进行初始化
/* Initialize the hash table */
int _dictInit(dict *d, dictType *type)
{
_dictReset(d, 0);
_dictReset(d, 1);
d->type = type;
d->rehashidx = -1;
d->pauserehash = 0;
return DICT_OK;
}
_dictInit方法会初始化type、rehashidx是-1表示这个字典没有rehash,pauserehash是0表示这个字典可以继续rehash
2.2 dictAdd方法添加键值对
/* Add an element to the target hash table */
int dictAdd(dict *d, void *key, void *val)
{
dictEntry *entry = dictAddRaw(d,key,NULL);
if (!entry) return DICT_ERR;
if (!d->type->no_value) dictSetVal(d, entry, val);
return DICT_OK;
}
dictAdd方法是调用dictAddRaw方法在dict字典中添加一个dictEntry数据,如果返回null会返回DICT_ERR表示添加失败,如果dictEntry数据创建成功会判断no_value属性如果为0就调用dictSetVal方法设置key对应的value,如果no_value设置为1表示这个字典只存储key不存储value,最后返回DICT_OK表示添加dictEntry成功
2.2.1 dictAddRaw方法
dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing)
{
/* Get the position for the new key or NULL if the key already exists. */
void *position = dictFindPositionForInsert(d, key, existing);
if (!position) return NULL;
/* Dup the key if necessary. */
if (d->type->keyDup) key = d->type->keyDup(d, key);
return dictInsertAtPosition(d, key, position);
}
dictAddRaw方法会先调用dictFindPositionForInsert方法根据key获取key在字典的位置position,如果存在会返回position是null,如果不存在会返回key在字典的位置position,然后会复制key,最后调用dictInsertAtPosition方法在字典的position位置插入数据
/* Finds and returns the position within the dict where the provided key should
* be inserted using dictInsertAtPosition if the key does not already exist in
* the dict. If the key exists in the dict, NULL is returned and the optional
* 'existing' entry pointer is populated, if provided. */
void *dictFindPositionForInsert(dict *d, const void *key, dictEntry **existing) {
unsigned long idx, table;
dictEntry *he;
uint64_t hash = dictHashKey(d, key);
if (existing) *existing = NULL;
if (dictIsRehashing(d)) _dictRehashStep(d);
/* Expand the hash table if needed */
if (_dictExpandIfNeeded(d) == DICT_ERR)
return NULL;
for (table = 0; table <= 1; table++) {
idx = hash & DICTHT_SIZE_MASK(d->ht_size_exp[table]);
/* Search if this slot does not already contain the given key */
he = d->ht_table[table][idx];
while(he) {
void *he_key = dictGetKey(he);
if (key == he_key || dictCompareKeys(d, key, he_key)) {
if (existing) *existing = he;
return NULL;
}
he = dictGetNext(he);
}
if (!dictIsRehashing(d)) break;
}
/* If we are in the process of rehashing the hash table, the bucket is
* always returned in the context of the second (new) hash table. */
dictEntry **bucket = &d->ht_table[dictIsRehashing(d) ? 1 : 0][idx];
return bucket;
}
dictFindPositionForInsert方法会遍历字典dict的旧的hash表(ht_table[0])和新的hash表(ht_table[1]),先计算key的哈希值,然后计算key对应的字典的ht_table数组的下标位置idx,接着循环遍历数组下标位置的单向链表,如果key相同会返回null表示已经存在key插入失败,如果key不同会遍历dictEntry的next指针到链表的下一个节点,如果key都不存在最后会返回ht_table数组的idx下标位置的dictEntry哈希槽bucket,注意这里会判断是否rehash如果rehash必须返回ht_table[1]表示新的hash表,因此rehash过程中新增键值对会添加到字典的新的hash表ht_table[1],而删除、修改、查询操作会先遍历ht_table[0]然后遍历ht_table[1],这会保证ht_table[0]的数据只减不增
/* Adds a key in the dict's hashtable at the position returned by a preceding
* call to dictFindPositionForInsert. This is a low level function which allows
* splitting dictAddRaw in two parts. Normally, dictAddRaw or dictAdd should be
* used instead. */
dictEntry *dictInsertAtPosition(dict *d, void *key, void *position) {
dictEntry **bucket = position; /* It's a bucket, but the API hides that. */
dictEntry *entry;
/* If rehashing is ongoing, we insert in table 1, otherwise in table 0.
* Assert that the provided bucket is the right table. */
int htidx = dictIsRehashing(d) ? 1 : 0;
assert(bucket >= &d->ht_table[htidx][0] &&
bucket <= &d->ht_table[htidx][DICTHT_SIZE_MASK(d->ht_size_exp[htidx])]);
size_t metasize = dictEntryMetadataSize(d);
if (d->type->no_value) {
assert(!metasize); /* Entry metadata + no value not supported. */
if (d->type->keys_are_odd && !*bucket) {
/* We can store the key directly in the destination bucket without the
* allocated entry.
*
* TODO: Add a flag 'keys_are_even' and if set, we can use this
* optimization for these dicts too. We can set the LSB bit when
* stored as a dict entry and clear it again when we need the key
* back. */
entry = key;
assert(entryIsKey(entry));
} else {
/* Allocate an entry without value. */
entry = createEntryNoValue(key, *bucket);
}
} else {
/* Allocate the memory and store the new entry.
* Insert the element in top, with the assumption that in a database
* system it is more likely that recently added entries are accessed
* more frequently. */
entry = zmalloc(sizeof(*entry) + metasize);
assert(entryIsNormal(entry)); /* Check alignment of allocation */
if (metasize > 0) {
memset(dictEntryMetadata(entry), 0, metasize);
}
entry->key = key;
entry->next = *bucket;
}
*bucket = entry;
d->ht_used[htidx]++;
return entry;
}
dictInsertAtPosition方法会先插入entry这个dictEntry类型数据到bucket的链表头部,即头插法因为头插法的时间复杂度是O(1),并且只设置key和next,并没有设置value,最后dict字典的ht_used长度加1表示新增节点成功并返回这个链表
2.2.2 dictSetVal方法
void dictSetVal(dict *d, dictEntry *de, void *val) {
assert(entryHasValue(de));
de->v.val = d->type->valDup ? d->type->valDup(d, val) : val;
}
如果dictEntry数据创建成功会判断no_value属性如果为0就调用dictSetVal方法设置key对应的value,因为上一步创建dictEntry只是设置了key和next,但是value没有设置
2.3 _dictExpandIfNeeded方法实现扩容
/* Expand the hash table if needed */
static int _dictExpandIfNeeded(dict *d)
{
/* Incremental rehashing already in progress. Return. */
if (dictIsRehashing(d)) return DICT_OK;
/* If the hash table is empty expand it to the initial size. */
if (DICTHT_SIZE(d->ht_size_exp[0]) == 0) return dictExpand(d, DICT_HT_INITIAL_SIZE);
/* If we reached the 1:1 ratio, and we are allowed to resize the hash
* table (global setting) or we should avoid it but the ratio between
* elements/buckets is over the "safe" threshold, we resize doubling
* the number of buckets. */
if ((dict_can_resize == DICT_RESIZE_ENABLE &&
d->ht_used[0] >= DICTHT_SIZE(d->ht_size_exp[0])) ||
(dict_can_resize != DICT_RESIZE_FORBID &&
d->ht_used[0] / DICTHT_SIZE(d->ht_size_exp[0]) > dict_force_resize_ratio))
{
if (!dictTypeExpandAllowed(d))
return DICT_OK;
return dictExpand(d, d->ht_used[0] + 1);
}
return DICT_OK;
}
_dictExpandIfNeeded方法会判断已经使用的节点个数除以字典数组的长度,如果大于等于1并且没有执行bgrewriteof命令会调用dictExpand方法进行扩容,如果大于dict_force_resize_ratio是5也会调用dictExpand方法进行扩容,扩容后的字典数组大小是大于ht_used[0]+1的最小的2的n次方,当负载因子小于0.1会缩容,这是因为执行bgrewriteof命令会进行写时复制,写时复制是执行写操作时不会立即修改原始数据,而是先创建副本然后在副本上修改数据,作用是可以保证其他进程读取原始数据时不会受到影响,因此执行bgrewriteof命令写时复制需要提高负载因子到5,避免内存的大量写入
其中,已经使用的节点个数是dict字典的ht_used属性,字典数组的长度是dict字典的ht_table数组长度ht_size_exp
/* return DICT_ERR if expand was not performed */
int dictExpand(dict *d, unsigned long size) {
return _dictExpand(d, size, NULL);
}
dictExpand方法会调用_dictExpand方法进行dict字典的扩容
/* Expand or create the hash table,
* when malloc_failed is non-NULL, it'll avoid panic if malloc fails (in which case it'll be set to 1).
* Returns DICT_OK if expand was performed, and DICT_ERR if skipped. */
int _dictExpand(dict *d, unsigned long size, int* malloc_failed)
{
if (malloc_failed) *malloc_failed = 0;
/* the size is invalid if it is smaller than the number of
* elements already inside the hash table */
if (dictIsRehashing(d) || d->ht_used[0] > size)
return DICT_ERR;
/* the new hash table */
dictEntry **new_ht_table;
unsigned long new_ht_used;
signed char new_ht_size_exp = _dictNextExp(size);
/* Detect overflows */
size_t newsize = 1ul<<new_ht_size_exp;
if (newsize < size || newsize * sizeof(dictEntry*) < newsize)
return DICT_ERR;
/* Rehashing to the same table size is not useful. */
if (new_ht_size_exp == d->ht_size_exp[0]) return DICT_ERR;
/* Allocate the new hash table and initialize all pointers to NULL */
if (malloc_failed) {
new_ht_table = ztrycalloc(newsize*sizeof(dictEntry*));
*malloc_failed = new_ht_table == NULL;
if (*malloc_failed)
return DICT_ERR;
} else
new_ht_table = zcalloc(newsize*sizeof(dictEntry*));
new_ht_used = 0;
/* Is this the first initialization? If so it's not really a rehashing
* we just set the first hash table so that it can accept keys. */
if (d->ht_table[0] == NULL) {
d->ht_size_exp[0] = new_ht_size_exp;
d->ht_used[0] = new_ht_used;
d->ht_table[0] = new_ht_table;
return DICT_OK;
}
/* Prepare a second hash table for incremental rehashing */
d->ht_size_exp[1] = new_ht_size_exp;
d->ht_used[1] = new_ht_used;
d->ht_table[1] = new_ht_table;
d->rehashidx = 0;
return DICT_OK;
}
_dictExpand方法先根据size调用_dictNextExp方法得到扩容后的字典数组大小是大于size的最小的2的n次方,因为size是2的n次方 hash % size = hash & (size - 1),然后分配内存空间,最后设置ht_table[0]和ht_table[1]并返回,其中ht_table[1]是为了增量rehash
2.4 dictRehash方法实现rehash
/* Performs N steps of incremental rehashing. Returns 1 if there are still
* keys to move from the old to the new hash table, otherwise 0 is returned.
*
* Note that a rehashing step consists in moving a bucket (that may have more
* than one key as we use chaining) from the old to the new hash table, however
* since part of the hash table may be composed of empty spaces, it is not
* guaranteed that this function will rehash even a single bucket, since it
* will visit at max N*10 empty buckets in total, otherwise the amount of
* work it does would be unbound and the function may block for a long time. */
int dictRehash(dict *d, int n) {
int empty_visits = n*10; /* Max number of empty buckets to visit. */
unsigned long s0 = DICTHT_SIZE(d->ht_size_exp[0]);
unsigned long s1 = DICTHT_SIZE(d->ht_size_exp[1]);
if (dict_can_resize == DICT_RESIZE_FORBID || !dictIsRehashing(d)) return 0;
if (dict_can_resize == DICT_RESIZE_AVOID &&
((s1 > s0 && s1 / s0 < dict_force_resize_ratio) ||
(s1 < s0 && s0 / s1 < dict_force_resize_ratio)))
{
return 0;
}
while(n-- && d->ht_used[0] != 0) {
dictEntry *de, *nextde;
/* Note that rehashidx can't overflow as we are sure there are more
* elements because ht[0].used != 0 */
assert(DICTHT_SIZE(d->ht_size_exp[0]) > (unsigned long)d->rehashidx);
while(d->ht_table[0][d->rehashidx] == NULL) {
d->rehashidx++;
if (--empty_visits == 0) return 1;
}
de = d->ht_table[0][d->rehashidx];
/* Move all the keys in this bucket from the old to the new hash HT */
while(de) {
uint64_t h;
nextde = dictGetNext(de);
void *key = dictGetKey(de);
/* Get the index in the new hash table */
if (d->ht_size_exp[1] > d->ht_size_exp[0]) {
h = dictHashKey(d, key) & DICTHT_SIZE_MASK(d->ht_size_exp[1]);
} else {
/* We're shrinking the table. The tables sizes are powers of
* two, so we simply mask the bucket index in the larger table
* to get the bucket index in the smaller table. */
h = d->rehashidx & DICTHT_SIZE_MASK(d->ht_size_exp[1]);
}
if (d->type->no_value) {
if (d->type->keys_are_odd && !d->ht_table[1][h]) {
/* Destination bucket is empty and we can store the key
* directly without an allocated entry. Free the old entry
* if it's an allocated entry.
*
* TODO: Add a flag 'keys_are_even' and if set, we can use
* this optimization for these dicts too. We can set the LSB
* bit when stored as a dict entry and clear it again when
* we need the key back. */
assert(entryIsKey(key));
if (!entryIsKey(de)) zfree(decodeMaskedPtr(de));
de = key;
} else if (entryIsKey(de)) {
/* We don't have an allocated entry but we need one. */
de = createEntryNoValue(key, d->ht_table[1][h]);
} else {
/* Just move the existing entry to the destination table and
* update the 'next' field. */
assert(entryIsNoValue(de));
dictSetNext(de, d->ht_table[1][h]);
}
} else {
dictSetNext(de, d->ht_table[1][h]);
}
d->ht_table[1][h] = de;
d->ht_used[0]--;
d->ht_used[1]++;
de = nextde;
}
d->ht_table[0][d->rehashidx] = NULL;
d->rehashidx++;
}
/* Check if we already rehashed the whole table... */
if (d->ht_used[0] == 0) {
zfree(d->ht_table[0]);
/* Copy the new ht onto the old one */
d->ht_table[0] = d->ht_table[1];
d->ht_used[0] = d->ht_used[1];
d->ht_size_exp[0] = d->ht_size_exp[1];
_dictReset(d, 1);
d->rehashidx = -1;
return 0;
}
/* More to rehash... */
return 1;
}
dictRehash方法会遍历ht_table[0]数组的rehashidx下标位置的链表,先获取链表的下一个节点,然后重新哈希,即key对新的hash表长度进行与运算得到下标位置h,接着判断dict的type的no_value为0表示需要设置value,如果no_value为1表示只有key没有value,然后设置ht_table[1]数组的下标位置h是数组,ht_used[0]减1,ht_used[1]加1,遍历结束之后释放内存,即ht_table[0]的rehashidx下标位置的链表设置为null
如果ht_used[0]是0表示已经rehash完成ht_table[0]数组中所有的下标位置链表,会将ht_used[0]设置成null,并将ht_used[1]数组拷贝到ht_used[0],最后返回0表示rehash完成,如果ht_used[0]不是0就返回1表示rehash还没有完成,因此字典dict的rehash是渐近式rehash,即分多次完成rehash,每一次只对一个数组下标位置链表进行rehash,渐近式rehash过程中新增键值对会添加到字典的ht_table[1]数组,而删除、修改、查询操作会先遍历ht_table[0]然后遍历ht_table[1],这会保证ht_table[0]的数据只减不增