redis源码剖析 – zskiplist的实现
redis中zskiplist的数据结构
skiplist是随机化算法的一种应用,在《数据结构与算法分析》一书中提到
跳跃表skiplist
是一种有序的数据结构,他通过在每个节点中维持多个指向其他节点的指针,从而达到快速访问节点的目的。
在大部分情况下,跳跃表skiplist
的效率都可以和平衡树相媲美,并且因为跳跃表的实现比平衡树更为简单,所以有不少程序使用跳跃表来代替平衡树
先来看看zskiplist中的node节点
/* ZSETs use a specialized version of Skiplists */
// 在跳跃表总多个节点可以有相同的分值,但各个节点之间的成员必需是唯一的
typedef struct zskiplistNode {
// 用于存储有序集合成员的member值
sds ele; // redis基础数据结构中定义的简单字符串
// 该节点的分值,所有节点是按照score的顺序排列的,当score相同时会按照ele字典排序
double score;
struct zskiplistNode *backward; // 前向迭代器
struct zskiplistLevel {
struct zskiplistNode *forward; // 后向迭代器,能够快速访问列表
unsigned long span; // 跨度,该迭代器跨越的节点的个数
} level[];
} zskiplistNode;
整体的zskiplist,相当于是zskiplistNode
的一个manager
typedef struct zskiplist {
// 表头节点和表尾节点
struct zskiplistNode *header, *tail;
// 表中节点数量
unsigned long length;
// 表中层数最大的节点的层数
int level;
} zskiplist;
zskiplist
中用来进行范围查找的数据结构
/* Struct to hold an inclusive/exclusive range spec by lexicographic comparison. */
typedef struct {
sds min, max; /* May be set to shared.(minstring|maxstring) */
int minex, maxex; /* are min or max exclusive? */
} zlexrangespec;
/* Struct to hold an inclusive/exclusive range spec by score comparison. */
typedef struct {
double min, max;
int minex, maxex; /* are min or max exclusive? */
} zrangespec;
在redis中一些有关zskiplist的宏定义
#define OBJ_ENCODING_SKIPLIST 7 /* Encoded as skiplist */
// 由于在zskiplist中是由数组来存储level的,而数组的索引是从0开始的
#define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^64 elements */
// 节点被访问的期望值
#define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */
// 在zskiplist中节点层工的随机函数
// 在zskiplist中各个节点的层数是一个随机数,由该随机函数随机生成
int zslRandomLevel(void) {
int level = 1;
while ((random()&0xFFF) < (ZSKIPLIST_P * 0xFFF))
level += 1;
return (level < ZSKIPLIST_MAXLEVEL) ? level : ZSKIPLIST_MAXLEVEL;
}
// 节点层高为n的概率为p^(n-1) * (1-p)
zskiplist提供的api接口
// 创建一个zskiplist -- done
zskiplist *zslCreate(void);
// 释放一个zskiplist所有的资源 -- done
void zslFree(zskiplist *zsl);
// 插入一个节点 -- done
zskiplistNode *zslInsert(zskiplist *zsl, double score, sds ele);
// 删除一个节点 -- done
int zslDelete(zskiplist *zsl, double score, sds ele, zskiplistNode **node);
// 获取符合给定范围内的第一个节点
zskiplistNode *zslFirstInRange(zskiplist *zsl, zrangespec *range);
// 获取符合给定范围内的最后一个节点
zskiplistNode *zslLastInRange(zskiplist *zsl, zrangespec *range);
// 通过分值和键寻找指定元素在zskiplist中的位置
unsigned long zslGetRank(zskiplist *zsl, double score, sds o);
// 获取符合给定范围内的第一个节点
zskiplistNode *zslFirstInLexRange(zskiplist *zsl, zlexrangespec *range);
// 获取符合给定范围内的最后一个节点
zskiplistNode *zslLastInLexRange(zskiplist *zsl, zlexrangespec *range);
-
创建一个zsl
zskiplist *zslCreate(viod)
/* Create a new skiplist. */ zskiplist *zslCreate(void) { int j; zskiplist *zsl; // zmalloc是redis内部封装的一个空间配置器抽象,根据其他的文章,主要是jemalloc // 因为linux中自带的libc-malloc会产生严重内存碎片,而jemalloc可以很好地减少内存碎片的产生 // 我认为jemalloc,libc-malloc就是一个memory_pool_manager,管理着一个已经申请的内存区,来提高效率 zsl = zmalloc(sizeof(*zsl)); // 初始化zskiplist的属性 zsl->level = 1; zsl->length = 0; // 创建zsl头节点 zsl->header = zslCreateNode(ZSKIPLIST_MAXLEVEL,0,NULL); for (j = 0; j < ZSKIPLIST_MAXLEVEL; j++) { zsl->header->level[j].forward = NULL; zsl->header->level[j].span = 0; } zsl->header->backward = NULL; zsl->tail = NULL; return zsl; }
-
创建一个zsl节点
zskiplistNode *zslCreateNode(int level, double score, sds ele)
/* Create a skiplist node with the specified number of levels. * The SDS string 'ele' is referenced by the node after the call. */ zskiplistNode *zslCreateNode(int level, double score, sds ele) { zskiplistNode *zn = zmalloc(sizeof(*zn)+level*sizeof(struct zskiplistLevel)); zn->score = score; zn->ele = ele; return zn; }
-
释放一个
zskiplist
/* Free a whole skiplist. */ void zslFree(zskiplist *zsl) { zskiplistNode *node = zsl->header->level[0].forward, *next; zfree(zsl->header); while(node) { next = node->level[0].forward; zslFreeNode(node); node = next; } zfree(zsl); }
-
释放一个zsl节点
/* Free the specified skiplist node. The referenced SDS string representation * of the element is freed too, unless node->ele is set to NULL before calling * this function. */ void zslFreeNode(zskiplistNode *node) { // 释放sds字符串,不然会造成内存泄露 sdsfree(node->ele); zfree(node); }
-
插入一个
zskiplistNode
节点插入分为四步
- 找到要插入节点的位置
- 调整跳跃表的高度
- 插入节点
- 调整
backward
/* Insert a new node in the skiplist. Assumes the element does not already * exist (up to the caller to enforce that). The skiplist takes ownership * of the passed SDS string 'ele'. */ zskiplistNode *zslInsert(zskiplist *zsl, double score, sds ele) { zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x; // 表示第i层最接近插入位置的节点距离header之间的span unsigned long rank[ZSKIPLIST_MAXLEVEL]; int i, level; serverAssert(!isnan(score)); x = zsl->header; /* rank数组和层数有关,用来表示第i层最接近插入位置的节点距离header之间的span, 用于在跳跃表中下层拥有上层的全部数据且下层数据的力度更小,精度更好, 所以在迭代时,rank[i]从rank[i+1]开始 由for循环中的while循环中的条件大致可以看出,找到位置的score需要大于参数score, 在此期间,x一直想score增加的方向移动 最终确定插入的位置的集合被保留在update数组中 */ for (i = zsl->level-1; i >= 0; i--) { /* store rank that is crossed to reach the insert position */ rank[i] = i == (zsl->level-1) ? 0 : rank[i+1]; while (x->level[i].forward && (x->level[i].forward->score < score || (x->level[i].forward->score == score && sdscmp(x->level[i].forward->ele,ele) < 0))) { rank[i] += x->level[i].span; x = x->level[i].forward; } update[i] = x; } // 调整跳跃表的高度 /* we assume the element is not already inside, since we allow duplicated * scores, reinserting the same element should never happen since the * caller of zslInsert() should test in the hash table if the element is * already inside or not. */ // 随机算法 random,随机生成插入节点的层数 level = zslRandomLevel(); if (level > zsl->level) { // 如果生成的层数大于zskiplist中最大的层数,就要从结点开始 for (i = zsl->level; i < level; i++) { rank[i] = 0; update[i] = zsl->header; update[i]->level[i].span = zsl->length; } zsl->level = level; } // 插入节点 x = zslCreateNode(level,score,ele); for (i = 0; i < level; i++) { x->level[i].forward = update[i]->level[i].forward; update[i]->level[i].forward = x; /* update span covered by update[i] as x is inserted here */ x->level[i].span = update[i]->level[i].span - (rank[0] - rank[i]); update[i]->level[i].span = (rank[0] - rank[i]) + 1; } /* increment span for untouched levels */ for (i = level; i < zsl->level; i++) { update[i]->level[i].span++; } // 调整backward x->backward = (update[0] == zsl->header) ? NULL : update[0]; if (x->level[0].forward) x->level[0].forward->backward = x; else zsl->tail = x; // 增加跳跃表的长度 zsl->length++; return x; }
-
删除一个节点
/* Delete an element with matching score/element from the skiplist. * The function returns 1 if the node was found and deleted, otherwise * 0 is returned. * 从skiplist中删除一个分值与参数匹配的元素 * 如果这个指定的节点被找到并且成功删除,返回1,否则返回0 * * If 'node' is NULL the deleted node is freed by zslFreeNode(), otherwise * it is not freed (but just unlinked) and *node is set to the node pointer, * so that it is possible for the caller to reuse the node (including the * referenced SDS string at node->ele). * 如果node为NULL,则被删除的节点将被zslFreeNode()释放,否则该node不会被释放,但是 * 也不会被连接在跳跃表中,并且*node被设置成一个指针,所以对于这个调用来说重新使用该指针是可能存在的 * 包括使用node中的sds指针*/ // 这里传的是三个参数double score, sds ele, zskiplistNode** node,并没有直接传来一个zskiplistNode* // 这里我认为是通过函数的参数限制了函数的行为, int zslDelete(zskiplist *zsl, double score, sds ele, zskiplistNode **node) { zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x; int i; x = zsl->header; for (i = zsl->level-1; i >= 0; i--) { while (x->level[i].forward && (x->level[i].forward->score < score || (x->level[i].forward->score == score && sdscmp(x->level[i].forward->ele,ele) < 0))) { x = x->level[i].forward; } update[i] = x; } /* We may have multiple elements with the same score, what we need * is to find the element with both the right score and object. */ // 可能有多个元素拥有相同的score,这时就需要通过sds和redisObject来进行识别 x = x->level[0].forward; if (x && score == x->score && sdscmp(x->ele,ele) == 0) { zslDeleteNode(zsl, x, update); if (!node) zslFreeNode(x); else *node = x; return 1; } return 0; /* not found */ }
-
获取符合给定范围内的第一个节点
/* Find the first node that is contained in the specified range.
* Returns NULL when no element is contained in the range.
* 寻找给定范围内的的第一个节点,当没有元素在给定的范围内时返回null*/
zskiplistNode *zslFirstInRange(zskiplist *zsl, zrangespec *range) {
zskiplistNode *x;
int i;
/* If everything is out of range, return early. */
if (!zslIsInRange(zsl,range)) return NULL;
x = zsl->header;
for (i = zsl->level-1; i >= 0; i--) {
/* Go forward while *OUT* of range. */
while (x->level[i].forward &&
!zslValueGteMin(x->level[i].forward->score,range))
x = x->level[i].forward;
}
/* This is an inner range, so the next node cannot be NULL. */
x = x->level[0].forward;
serverAssert(x != NULL);
/* Check if score <= max. */
if (!zslValueLteMax(x->score,range)) return NULL; // 这个接口有点复杂我也看不懂
return x;
}
-
获取符合给定范围内的最后一个节点
/* Find the last node that is contained in the specified range. * Returns NULL when no element is contained in the range. */ zskiplistNode *zslLastInRange(zskiplist *zsl, zrangespec *range) { zskiplistNode *x; int i; /* If everything is out of range, return early. */ if (!zslIsInRange(zsl,range)) return NULL; x = zsl->header; for (i = zsl->level-1; i >= 0; i--) { /* Go forward while *IN* range. */ while (x->level[i].forward && zslValueLteMax(x->level[i].forward->score,range)) x = x->level[i].forward; } /* This is an inner range, so this node cannot be NULL. */ serverAssert(x != NULL); /* Check if score >= min. */ if (!zslValueGteMin(x->score,range)) return NULL; return x; }
-
通过score和key寻找给定范围内的元素
/* Find the rank for an element by both score and key. * 通过score和key寻找在指定范围内的元素 * Returns 0 when the element cannot be found, rank otherwise. * 当没有元素符合时,返回0 * Note that the rank is 1-based due to the span of zsl->header to the * first element. * */ unsigned long zslGetRank(zskiplist *zsl, double score, sds ele) { zskiplistNode *x; unsigned long rank = 0; int i; x = zsl->header; for (i = zsl->level-1; i >= 0; i--) { while (x->level[i].forward && (x->level[i].forward->score < score || (x->level[i].forward->score == score && sdscmp(x->level[i].forward->ele,ele) <= 0))) { rank += x->level[i].span; x = x->level[i].forward; } /* x might be equal to zsl->header, so test if obj is non-NULL */ if (x->ele && x->score == score && sdscmp(x->ele,ele) == 0) { return rank; } } return 0; }
```c
/* Find the first node that is contained in the specified lex range.
* Returns NULL when no element is contained in the range. */
zskiplistNode *zslFirstInLexRange(zskiplist *zsl, zlexrangespec *range) {
zskiplistNode *x;
int i;
/* If everything is out of range, return early. */
if (!zslIsInLexRange(zsl,range)) return NULL;
x = zsl->header;
for (i = zsl->level-1; i >= 0; i--) {
/* Go forward while *OUT* of range. */
while (x->level[i].forward &&
!zslLexValueGteMin(x->level[i].forward->ele,range))
x = x->level[i].forward;
}
/* This is an inner range, so the next node cannot be NULL. */
x = x->level[0].forward;
serverAssert(x != NULL);
/* Check if score <= max. */
if (!zslLexValueLteMax(x->ele,range)) return NULL;
return x;
}
```
-
跟上面的两个接口差不都比较的元素不同而已
/* Find the last node that is contained in the specified range. * Returns NULL when no element is contained in the range. */ zskiplistNode *zslLastInLexRange(zskiplist *zsl, zlexrangespec *range) { zskiplistNode *x; int i; /* If everything is out of range, return early. */ if (!zslIsInLexRange(zsl,range)) return NULL; x = zsl->header; for (i = zsl->level-1; i >= 0; i--) { /* Go forward while *IN* range. */ while (x->level[i].forward && zslLexValueLteMax(x->level[i].forward->ele,range)) x = x->level[i].forward; } /* This is an inner range, so this node cannot be NULL. */ serverAssert(x != NULL); /* Check if score >= min. */ if (!zslLexValueGteMin(x->ele,range)) return NULL; return x; }
/* Go forward while *IN* range. */ while (x->level[i].forward && zslLexValueLteMax(x->level[i].forward->ele,range)) x = x->level[i].forward; } /* This is an inner range, so this node cannot be NULL. */ serverAssert(x != NULL); /* Check if score >= min. */ if (!zslLexValueGteMin(x->ele,range)) return NULL; return x;
}