Skip list是一种多层次的有序链表,通过随机地选择层数来实现插入、查找和删除都是O(logn)的时间复杂度(和平衡树同样的效率,但实现比平衡树简单很多)。关于skip list的具体介绍可以参见William Pugh的论文:Skip Lists: A Probabilistic Alternative to Balanced Trees 。也可以参见上一篇日志。如下图,为一个简单的skiplist结构图(注:redis 的 skip list 比这个要复杂)
下面我们来分析一下redis中skip list的实现。Redis中skip list主要有zskiplist和zskiplistNode两个数据结构:
/* ZSETs use a specialized version of Skiplists */
typedef struct zskiplistNode {
robj *obj;
double score; //redis 中的set类型的score
struct zskiplistNode *backward; //指向前一个结点的指针
struct zskiplistLevel {
struct zskiplistNode *forward; //前向指针
unsigned int span; // span是为redis的ZRANK和ZREVRANK命令设计的,可以看一下zrankGenericCommand函数
} level[]; //结点的level 个前向指针和span
} zskiplistNode;
typedef struct zskiplist {
struct zskiplistNode *header, *tail; //头结点和尾结点
unsigned long length; //结点个数
int level; //最大level数
} zskiplist;
其中zskiplistNode中包含一个zskiplistLevel数组,数组的大小根据节点所在的层数(level)决定。backward指针是为了方便向后遍历而对skip list做的改进。
主要的API有:
zslCreate 创建一个zskiplist,并添加一个具有最高层数ZSKIPLIST_MAXLEVEL(代码中定义为32)的节点来管理分层的链表。
zslInsert 插入一个节点到zskiplist,并调整每一个层级的链表都是有序的。
zslDelete 从zskiplist删除一个节点,并调整剩余节点在每个层级都是有序的。
zslRandomLevel 为新加入的节点随机产生一个不超过ZSKIPLIST_MAXLEVEL的层数。
1. zslCreate
zskiplistNode *zslCreateNode(int level, double score, robj *obj) {
zskiplistNode *zn = zmalloc(sizeof(*zn)+level*sizeof(struct zskiplistLevel)); //给结点分配内存
zn->score = score;
zn->obj = obj;
return zn;
}
zskiplist *zslCreate(void) {
int j;
zskiplist *zsl;
zsl = zmalloc(sizeof(*zsl)); //给zskiplist分配内存
zsl->level = 1; //初始化level=1
zsl->length = 0; //结点数=0
zsl->header = zslCreateNode(ZSKIPLIST_MAXLEVEL,0,NULL); //初始化一个header结点,该结点不保存数据,用于管理链表
for (j = 0; j < ZSKIPLIST_MAXLEVEL; j++) {
zsl->header->level[j].forward = NULL;
zsl->header->level[j].span = 0;
}
zsl->header->backward = NULL;
zsl->tail = NULL;
return zsl;
}
/* Returns a random level for the new skiplist node we are going to create.
* The return value of this function is between 1 and ZSKIPLIST_MAXLEVEL
* (both inclusive), with a powerlaw-alike distribution where higher
* levels are less likely to be returned. */
int zslRandomLevel(void) {
int level = 1;
while ((random()&0xFFFF) < (ZSKIPLIST_P * 0xFFFF)) //ZSKIPLIST_P 是一个概率值
level += 1;
return (level<ZSKIPLIST_MAXLEVEL) ? level : ZSKIPLIST_MAXLEVEL;
}
3. zslInsert
zskiplistNode *zslInsert(zskiplist *zsl, double score, robj *obj) {
zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
unsigned int rank[ZSKIPLIST_MAXLEVEL]; //更新span需要用到rank
int i, level;
redisAssert(!isnan(score));
x = zsl->header;
for (i = zsl->level-1; i >= 0; i--) {
/* store rank that is crossed to reach the insert position */
rank[i] = i == (zsl->level-1) ? 0 : rank[i+1]; //除了rank[zsl->level-1]=0,其他的rank[i]初始时等于rank[i+1]
while (x->level[i].forward && //是从zskiplist现有的最高层开始向前,并在查找的过程中根据规则转向低层的链表继续,一直到skip list的最低层为止。同时看到redis的实现中允许相同的score存在(这时按对象的字符串进行比较),但不允许具有相同值的对象并存(集合的特性)
(x->level[i].forward->score < score ||
(x->level[i].forward->score == score &&
compareStringObjects(x->level[i].forward->obj,obj) < 0))) {
rank[i] += x->level[i].span; //rank[i]表示从header到第i层x的结点数
x = x->level[i].forward;
}
update[i] = x; //update 保存插入后需要修改的指针
}
/* we assume the key is not already inside, since we allow duplicated
* scores, and the re-insertion of score and redis object should never
* happpen since the caller of zslInsert() should test in the hash table
* if the element is already inside or not. */
level = zslRandomLevel(); //随机获得一个level
if (level > zsl->level) { //当level值大于链表的level时,更新链表的level和header(先保存在update)
for (i = zsl->level; i < level; i++) {
rank[i] = 0;
update[i] = zsl->header;
update[i]->level[i].span = zsl->length;
}
zsl->level = level;
}
x = zslCreateNode(level,score,obj); //新建一个结点,分配内存
for (i = 0; i < level; i++) { //把x插入到update和update的forward指向之间,更新结点的level的forward指针和span值
x->level[i].forward = update[i]->level[i].forward; //x的forward指针指向原先update的指向
update[i]->level[i].forward = x; //update的指针指向x
/* update span covered by update[i] as x is inserted here */
x->level[i].span = update[i]->level[i].span - (rank[0] - rank[i]); //x的span值为原先update到forward之间的数减去update到x之间的结点数
update[i]->level[i].span = (rank[0] - rank[i]) + 1; //update的span的span等于update到x之间的结点数
}
/* increment span for untouched levels */
for (i = level; i < zsl->level; i++) { //当level值小于zsl->level时,更新update[i]的每一个span,因为x插
update[i]->level[i].span++; //在update和forward之间
}
x->backward = (update[0] == zsl->header) ? NULL : update[0];
if (x->level[0].forward)
x->level[0].forward->backward = x;
else
zsl->tail = x;
zsl->length++;
return x;
}