[redis] dict
断点位置
- server.c main
- server.c aeMain(server.el);
// 为了支持不同的key和val而做的类型擦除
typedef struct dictType {
uint64_t (*hashFunction)(const void *key); // 计算hash
void *(*keyDup)(dict *d, const void *key); // copy key
void *(*valDup)(dict *d, const void *obj); // copy val
int (*keyCompare)(dict *d, const void *key1, const void *key2); // 比较key
void (*keyDestructor)(dict *d, void *key);// 析构key
void (*valDestructor)(dict *d, void *obj);// 析构val
int (*expandAllowed)(size_t moreMem, double usedRatio);
/* Allow a dictEntry to carry extra caller-defined metadata. The
* extra memory is initialized to 0 when a dictEntry is allocated. */
size_t (*dictEntryMetadataBytes)(dict *d);
} dictType;
// 每一个hash元素的结构
typedef struct dictEntry {
void *key;
union {
void *val;
uint64_t u64;
int64_t s64;
double d;
} v;
struct dictEntry *next; /* Next entry in the same hash bucket. */
void *metadata[]; /* An arbitrary number of bytes (starting at a
* pointer-aligned address) of size as returned
* by dictType's dictEntryMetadataBytes(). */
} dictEntry;
// 字典
struct dict {
dictType *type; // 指向dictType结构,dictType包含自定义的函数,使得key和value支持任意类型的数据
dictEntry **ht_table[2]; //两张hash表
unsigned long ht_used[2]; // 当前正在使用哪张hash表
long rehashidx; /* rehashing not in progress if rehashidx == -1 */
/* Keep small vars at end for optimal (minimal) struct padding */
int16_t pauserehash; /* If >0 rehashing is paused (<0 indicates coding error) */
signed char ht_size_exp[2]; /* exponent of size. (size = 1<<exp) */
};
使用例子观察调用栈和具体代码
redis使用了两个hash表(为了rehash)
dict的结构图
如何在dict中插入一个kv对
原理是先计算key的hash(mt133-64),然后对bucket个数取模找到具体的bucket,然后插入到链表的最前边即可。
在查找key时,当调用dictFind函数时都会检查是否需要rehash。rehash的代码如下:
/* Performs N steps of incremental rehashing. Returns 1 if there are still
* keys to move from the old to the new hash table, otherwise 0 is returned.
*
* Note that a rehashing step consists in moving a bucket (that may have more
* than one key as we use chaining) from the old to the new hash table, however
* since part of the hash table may be composed of empty spaces, it is not
* guaranteed that this function will rehash even a single bucket, since it
* will visit at max N*10 empty buckets in total, otherwise the amount of
* work it does would be unbound and the function may block for a long time. */
int dictRehash(dict *d, int n) {
int empty_visits = n*10; /* Max number of empty buckets to visit. */
if (!dictIsRehashing(d)) return 0;
while(n-- && d->ht_used[0] != 0) {
dictEntry *de, *nextde;
/* Note that rehashidx can't overflow as we are sure there are more
* elements because ht[0].used != 0 */
assert(DICTHT_SIZE(d->ht_size_exp[0]) > (unsigned long)d->rehashidx);
while(d->ht_table[0][d->rehashidx] == NULL) {
d->rehashidx++;
if (--empty_visits == 0) return 1;
}
de = d->ht_table[0][d->rehashidx];
/* Move all the keys in this bucket from the old to the new hash HT */
while(de) {
uint64_t h;
nextde = de->next;
/* Get the index in the new hash table */
h = dictHashKey(d, de->key) & DICTHT_SIZE_MASK(d->ht_size_exp[1]);
de->next = d->ht_table[1][h];
d->ht_table[1][h] = de;
d->ht_used[0]--;
d->ht_used[1]++;
de = nextde;
}
d->ht_table[0][d->rehashidx] = NULL;
d->rehashidx++;
}
/* Check if we already rehashed the whole table... */
if (d->ht_used[0] == 0) {
zfree(d->ht_table[0]);
/* Copy the new ht onto the old one */
d->ht_table[0] = d->ht_table[1];
d->ht_used[0] = d->ht_used[1];
d->ht_size_exp[0] = d->ht_size_exp[1];
_dictReset(d, 1);
d->rehashidx = -1;
return 0;
}
/* More to rehash... */
return 1;
}
接口的意思是对字典执行n个bucket的rehash,具体做法是:遍历n个bucket,对于每一个bucket上的所有节点,计算其hash值然后将其插入到ht_table[1]合适的位置上。最后判断整个dict是否已经完全rehash完毕(d->ht_used[0] == 0),如果rehash完毕,释放ht_table[0]的内存(其实就是分配用于存放bucket的内存),然后将ht_table[1]移动到ht_table[0]中,并将ht_table[0]重置即可。
如何再hash
再hash的实现与int _dictExpand(dict d, unsigned long size, int malloc_failed)和int dictRehash(dict *d, int n)有关,其中dictExpand的作用是根据当前的size生成一个新的size * 2的hash table,然后将其挂在ht_table[1]上。
/* Performs N steps of incremental rehashing. Returns 1 if there are still
* keys to move from the old to the new hash table, otherwise 0 is returned.
*
* Note that a rehashing step consists in moving a bucket (that may have more
* than one key as we use chaining) from the old to the new hash table, however
* since part of the hash table may be composed of empty spaces, it is not
* guaranteed that this function will rehash even a single bucket, since it
* will visit at max N*10 empty buckets in total, otherwise the amount of
* work it does would be unbound and the function may block for a long time. */
int dictRehash(dict *d, int n) {
int empty_visits = n*10; /* Max number of empty buckets to visit. */
if (!dictIsRehashing(d)) return 0;
while(n-- && d->ht_used[0] != 0) {
dictEntry *de, *nextde;
/* Note that rehashidx can't overflow as we are sure there are more
* elements because ht[0].used != 0 */
assert(DICTHT_SIZE(d->ht_size_exp[0]) > (unsigned long)d->rehashidx);
while(d->ht_table[0][d->rehashidx] == NULL) {
d->rehashidx++;
if (--empty_visits == 0) return 1;
}
de = d->ht_table[0][d->rehashidx];
/* Move all the keys in this bucket from the old to the new hash HT */
while(de) {
uint64_t h;
nextde = de->next;
/* Get the index in the new hash table */
h = dictHashKey(d, de->key) & DICTHT_SIZE_MASK(d->ht_size_exp[1]);
de->next = d->ht_table[1][h];
d->ht_table[1][h] = de;
d->ht_used[0]--;
d->ht_used[1]++;
de = nextde;
}
d->ht_table[0][d->rehashidx] = NULL;
d->rehashidx++;
}
/* Check if we already rehashed the whole table... */
if (d->ht_used[0] == 0) {
zfree(d->ht_table[0]);
/* Copy the new ht onto the old one */
d->ht_table[0] = d->ht_table[1];
d->ht_used[0] = d->ht_used[1];
d->ht_size_exp[0] = d->ht_size_exp[1];
_dictReset(d, 1);
d->rehashidx = -1;
return 0;
}
/* More to rehash... */
return 1;
}
这个接口对dict的n个桶进行再hash,rehashidx其实记录的是当前rehashidx应该开始遍历的bucket的位置,然后这里对空bucket做了一个优化,最多遍历10个空bucket就结束此次rehash。当找到要hash的bucket时,将该bucket上的所有节点kv对插入到ht_table[1]中,具体动作是:对每个kv对计算key的hash,然后插入到对应的bt_table[1]的bucket上。最后如果rehash完毕(d->ht_used[0] == 0),释放ht_table[0]的空间(其实就是为了存储bucket分配的空间),将ht_table[1]替换到ht_table[0]上,然后ht_table[1]的状态初始化,rehashidx标志置为-1。
dict如何扩容和缩容
缩容
databasesCron
tryResizeHashTables
dictResize
dictExpand
getCommand
对于getCommand来说,仍然是先执行dictFind查找key,然后找到key之后获取其val,然后刷新LRU
robj *lookupKey(redisDb *db, robj *key, int flags) {
dictEntry *de = dictFind(db->dict,key->ptr);
robj *val = NULL;
if (de) {
val = dictGetVal(de);
/* Forcing deletion of expired keys on a replica makes the replica
* inconsistent with the master. We forbid it on readonly replicas, but
* we have to allow it on writable replicas to make write commands
* behave consistently.
*
* It's possible that the WRITE flag is set even during a readonly
* command, since the command may trigger events that cause modules to
* perform additional writes. */
int is_ro_replica = server.masterhost && server.repl_slave_ro;
int force_delete_expired = flags & LOOKUP_WRITE && !is_ro_replica;
if (expireIfNeeded(db, key, force_delete_expired)) {
/* The key is no longer valid. */
val = NULL;
}
}
if (val) {
/* Update the access time for the ageing algorithm.
* Don't do it if we have a saving child, as this will trigger
* a copy on write madness. */
if (!hasActiveChildProcess() && !(flags & LOOKUP_NOTOUCH)){
if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
updateLFU(val);
} else {
val->lru = LRU_CLOCK();
}
}
if (!(flags & (LOOKUP_NOSTATS | LOOKUP_WRITE)))
server.stat_keyspace_hits++;
/* TODO: Use separate hits stats for WRITE */
} else {
if (!(flags & (LOOKUP_NONOTIFY | LOOKUP_WRITE)))
notifyKeyspaceEvent(NOTIFY_KEY_MISS, "keymiss", key, db->id);
if (!(flags & (LOOKUP_NOSTATS | LOOKUP_WRITE)))
server.stat_keyspace_misses++;
/* TODO: Use separate misses stats and notify event for WRITE */
}
return val;
}