本人 github 地址
github 地址 里面有注释好的代码,下载下来可以方便阅读。
本篇文章看点
- 如果你对zset的结构仍然有疑问,请认真看本篇文章
- 如果面试官问你zset到底增加节点和删除节点都是如何操作的,你觉得你回答不出来或者不够清晰,请认真看本篇文章
- 如果你是redis源码爱好者,请认真看本篇文章。
- 如果你立志做一个肚里有货的程序员,请认真看本篇文章。
本篇文章会深度剖析跳表的add操作,不放过任何一行代码,对于文字解释不清的会画图,画图还让你不懂,请@作者,作者亲自给你回答。
好了说了那么多废话,下面直接进入主题。
zset 的结构
t_zset.c
typedef struct zset {
//zset 包含一个字典
dict *dict;
//跳表
zskiplist *zsl;
} zset;
typedef struct zskiplist {
//头尾节点
struct zskiplistNode *header, *tail;
//元素长度
unsigned long length;
//整体层级
int level;
} zskiplist;
typedef struct zskiplistNode {
//sds 可以认为key
sds ele;
//通过score 来排序
double score;
//后指针
struct zskiplistNode *backward;
struct zskiplistLevel {
//前指针
struct zskiplistNode *forward;
//数据宽度,相邻节点隔了几个元素
unsigned long span;
} level[];
} zskiplistNode;
上图:
数据示例:
上图只展示了head的zskiplistLevel , 如果你能仔细看上面的图,你就会发现和你之前理解的跳表模型是很不一样的,因为以前画的图的确是不严谨的,从而导致你是不知道跳表该如何去增删改查,上面的图虽然没有完全展开去画但是绝对是遵循了源码来画的。
add操作源码解析
命令结构
首先看zadd 这个命令结构
zadd key [NX|XX] [CH] [INCR] score member [score member …]
nx: 表示只有元素不存在的时候才会操作。(可选)
xx: 表示只有元素存在的时候才会操作。(可选与nx互斥)
ch: ch存在的时候返回参数表示增加和更新的元素节点的总个数。因为zadd 一次性能够增加多个节点。
incr 就是对某个member 增加score。incr只能增加一对参数,不能多个同时提交。这是因为跟其返回给客户端的参数有关系,因为incr会返回增加后的分数,如果member 中有相同的元素那么其返回的时候就不好一一对应起来。
score , 就是分数,跳表会根据score排序,分数相等的时候,比较其member,
member为string类型,可以看作对应score的key。
add的主流程结构
zadd 操作源码路口
{"zadd",zaddCommand,-4,
"write use-memory fast @sortedset",
0,NULL,1,1,1,0,0,0},
可以看到zadd的tag 是一个fast操作
void zaddCommand(client *c) {
zaddGenericCommand(c,ZADD_NONE);
}
void zaddGenericCommand(client *c, int flags) {
static char *nanerr = "resulting score is not a number (NaN)";
robj *key = c->argv[1];
robj *zobj;
sds ele;
double score = 0, *scores = NULL;
int j, elements;
int scoreidx = 0;
/* The following vars are used in order to track what the command actually
* did during the execution, to reply to the client and to trigger the
* notification of keyspace change. */
int added = 0; /* Number of new elements added. */
int updated = 0; /* Number of elements with updated score. */
int processed = 0; /* Number of elements processed, may remain zero with
options like XX. */
/* Parse options. At the end 'scoreidx' is set to the argument position
* of the score of the first score-element pair. */
scoreidx = 2;
while(scoreidx < c->argc) {
char *opt = c->argv[scoreidx]->ptr;
//nx 跟set命令,当元素不存在的时候才会成功
if (!strcasecmp(opt,"nx")) flags |= ZADD_NX;
//xx 只做覆盖
else if (!strcasecmp(opt,"xx")) flags |= ZADD_XX;
//返回 是否被修改
else if (!strcasecmp(opt,"ch")) flags |= ZADD_CH;
//增加score的分数
else if (!strcasecmp(opt,"incr")) flags |= ZADD_INCR;
else break;
scoreidx++;
}
/* Turn options into simple to check vars. */
int incr = (flags & ZADD_INCR) != 0;
int nx = (flags & ZADD_NX) != 0;
int xx = (flags & ZADD_XX) != 0;
int ch = (flags & ZADD_CH) != 0;
/* After the options, we expect to have an even number of args, since
* we expect any number of score-element pairs. */
//剩下元素是成对出现的
elements = c->argc-scoreidx;
if (elements % 2 || !elements) {
addReply(c,shared.syntaxerr);
return;
}
//成对去取参数
elements /= 2; /* Now this holds the number of score-element pairs. */
/* Check for incompatible options. */
//nx和xx 是互斥
if (nx && xx) {
addReplyError(c,
"XX and NX options at the same time are not compatible");
return;
}
//incr 操作只能single pair
if (incr && elements > 1) {
addReplyError(c,
"INCR option supports a single increment-element pair");
return;
}
/* Start parsing all the scores, we need to emit any syntax error
* before executing additions to the sorted set, as the command should
* either execute fully or nothing at all. */
//再做其它set操作之前先检查是否格式问题,score只能是double类型
scores = zmalloc(sizeof(double)*elements);
for (j = 0; j < elements; j++) {
if (getDoubleFromObjectOrReply(c,c->argv[scoreidx+j*2],&scores[j],NULL)
!= C_OK) goto cleanup;
}
/* Lookup the key and create the sorted set if does not exist. */
//查看key 是否存在
zobj = lookupKeyWrite(c->db,key);
if (zobj == NULL) {
//xx 必须是元素存在
if (xx) goto reply_to_client; /* No key + XX option: nothing to do. */
//判断member 是否超过转化成ziplist的阈值
//如果阈值为0,则不会转为ziplist
if (server.zset_max_ziplist_entries == 0 ||
server.zset_max_ziplist_value < sdslen(c->argv[scoreidx+1]->ptr))
{
zobj = createZsetObject();
} else {
//ziplist
zobj = createZsetZiplistObject();
}
//放入键值空间
dbAdd(c->db,key,zobj);
} else {
if (zobj->type != OBJ_ZSET) {
addReply(c,shared.wrongtypeerr);
goto cleanup;
}
}
for (j = 0; j < elements; j++) {
//返回的new score
double newscore;
//分数
score = scores[j];
//flag是操作类型
int retflags = flags;
//element
ele = c->argv[scoreidx+1+j*2]->ptr;
//增加节点
int retval = zsetAdd(zobj, score, ele, &retflags, &newscore);
if (retval == 0) {
addReplyError(c,nanerr);
goto cleanup;
}
if (retflags & ZADD_ADDED) added++;
if (retflags & ZADD_UPDATED) updated++;
if (!(retflags & ZADD_NOP)) processed++;
score = newscore;
}
server.dirty += (added+updated);
reply_to_client:
if (incr) { /* ZINCRBY or INCR option. */
if (processed)
//返回分数
addReplyDouble(c,score);
else
addReplyNull(c);
} else { /* ZADD. */
//返回影响的节点个数
addReplyLongLong(c,ch ? added+updated : added);
}
cleanup:
zfree(scores);
if (added || updated) {
signalModifiedKey(c,c->db,key);
notifyKeyspaceEvent(NOTIFY_ZSET,
incr ? "zincr" : "zadd", key, c->db->id);
}
}
robj *createZsetObject(void) {
//分配空间
zset *zs = zmalloc(sizeof(*zs));
robj *o;
//分配字典
zs->dict = dictCreate(&zsetDictType,NULL);
zs->zsl = zslCreate();
//zs的初始化
o = createObject(OBJ_ZSET,zs);
o->encoding = OBJ_ENCODING_SKIPLIST;
return o;
}
/* Create a new skiplist. */
//zsl的初始化
zskiplist *zslCreate(void) {
int j;
zskiplist *zsl;
zsl = zmalloc(sizeof(*zsl));
zsl->level = 1;
zsl->length = 0;
zsl->header = zslCreateNode(ZSKIPLIST_MAXLEVEL,0,NULL);
//特别要注意head的初始化
for (j = 0; j < ZSKIPLIST_MAXLEVEL; j++) {
zsl->header->level[j].forward = NULL;
zsl->header->level[j].span = 0;
}
zsl->header->backward = NULL;
zsl->tail = NULL;
return zsl;
}
上面的源代码是zadd的主体流程主要是有一些对于参数的提取,操作的判断,zset的初始化,返回参数的定义。
zadd 的核心逻辑代码
int zsetAdd(robj *zobj, double score, sds ele, int *flags, double *newscore) {
/* Turn options into simple to check vars. */
int incr = (*flags & ZADD_INCR) != 0;
int nx = (*flags & ZADD_NX) != 0;
int xx = (*flags & ZADD_XX) != 0;
*flags = 0; /* We'll return our response flags. */
double curscore;
/* NaN as input is an error regardless of all the other parameters. */
//score 是否是null
if (isnan(score)) {
*flags = ZADD_NAN;
return 0;
}
/* Update the sorted set according to its encoding. */
//如果是ziplist
if (zobj->encoding == OBJ_ENCODING_ZIPLIST) {
unsigned char *eptr;
if ((eptr = zzlFind(zobj->ptr,ele,&curscore)) != NULL) {
/* NX? Return, same element already exists. */
if (nx) {
//nx操作因为元素已经存在所以就不做操作
*flags |= ZADD_NOP;
return 1;
}
/* Prepare the score for the increment if needed. */
if (incr) {
score += curscore;
//如果越界了就不做其它操作
if (isnan(score)) {
*flags |= ZADD_NAN;
return 0;
}
//给score 赋值
if (newscore) *newscore = score;
}
/* Remove and re-insert when score changed. */
if (score != curscore) {
//remove 对应的元素
zobj->ptr = zzlDelete(zobj->ptr,eptr);
//增加元素
zobj->ptr = zzlInsert(zobj->ptr,ele,score);
*flags |= ZADD_UPDATED;
}
return 1;
}
//xx 的意思是 Only touch elements already existing
else if (!xx) {
/* Optimize: check if the element is too large or the list
* becomes too long *before* executing zzlInsert. */
zobj->ptr = zzlInsert(zobj->ptr,ele,score);
//如果插入后整个ziplist一半长度超过zset_max_ziplist_entries
//或者element的大小超过zset_max_ziplist_value会转换为跳表结构
//zset_max_ziplist_entries 默认为128
//zset_max_ziplist_value 默认为 64
if (zzlLength(zobj->ptr) > server.zset_max_ziplist_entries ||
sdslen(ele) > server.zset_max_ziplist_value)
zsetConvert(zobj,OBJ_ENCODING_SKIPLIST);
if (newscore) *newscore = score;
*flags |= ZADD_ADDED;
return 1;
} else {
*flags |= ZADD_NOP;
return 1;
}
} else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) {
//下面本身是跳表结构的时候
zset *zs = zobj->ptr;
zskiplistNode *znode;
dictEntry *de;
//通过字典找到对应的entry
de = dictFind(zs->dict,ele);
//元素存在走入下面流程
if (de != NULL) {
/* NX? Return, same element already exists. */
if (nx) {
*flags |= ZADD_NOP;
return 1;
}
//找到分数
curscore = *(double*)dictGetVal(de);
/* Prepare the score for the increment if needed. */
if (incr) {
score += curscore;
if (isnan(score)) {
*flags |= ZADD_NAN;
return 0;
}
if (newscore) *newscore = score;
}
/* Remove and re-insert when score changes. */
if (score != curscore) {
//因为score 会影响到排序所以是先删除再插入
znode = zslUpdateScore(zs->zsl,curscore,ele,score);
/* Note that we did not removed the original element from
* the hash table representing the sorted set, so we just
* update the score. */
dictGetVal(de) = &znode->score; /* Update score ptr. */
*flags |= ZADD_UPDATED;
}
return 1;
} else if (!xx) {
//新增节点
//因为ele是临时变量
//所以这里要另外分配空间
ele = sdsdup(ele);
znode = zslInsert(zs->zsl,score,ele);
serverAssert(dictAdd(zs->dict,ele,&znode->score) == DICT_OK);
*flags |= ZADD_ADDED;
if (newscore) *newscore = score;
return 1;
} else {
//操作为xx ,但是member 对应的节点又不存在
*flags |= ZADD_NOP;
return 1;
}
} else {
serverPanic("Unknown sorted set encoding");
}
return 0; /* Never reached. */
}
* Insert (element,score) pair in ziplist. This function assumes the element is
* not yet present in the list. */
unsigned char *zzlInsert(unsigned char *zl, sds ele, double score) {
//同样的重新开始遍历
unsigned char *eptr = ziplistIndex(zl,0), *sptr;
double s;
while (eptr != NULL) {
//获取到score 节点
sptr = ziplistNext(zl,eptr);
serverAssert(sptr != NULL);
s = zzlGetScore(sptr);
//需要做排序比较,性能的会o(n) 但是这个n的长度
//会做限制
if (s > score) {
/* First element with score larger than score for element to be
* inserted. This means we should take its spot in the list to
* maintain ordering. */
zl = zzlInsertAt(zl,eptr,ele,score);
break;
} else if (s == score) {
/* Ensure lexicographical ordering for elements. */
//等于的情况再比较
if (zzlCompareElements(eptr,(unsigned char*)ele,sdslen(ele)) > 0) {
zl = zzlInsertAt(zl,eptr,ele,score);
break;
}
}
/* Move to next element. */
eptr = ziplistNext(zl,sptr);
}
上面这一长串代码最主要是看到了
- zset在节点数比较小,member没有用特别长的字符床情况下,其econding 是ziplist, type是zset,存储方式element 和 score是存储在相邻的两个节点。
- zset遇到更新节点的情况,一般都是采用先删除后插入的方式。这里也分情况如果encoding是ziplist,即使新的score 位置没有发生改变仍然会先删除后插入,但是如果插入是跳表结构,如果新的score的位置还是原来的位置则直接更新。
我们直接来看到跳表的更新操作
/* Update the score of an elmenent inside the sorted set skiplist.
* Note that the element must exist and must match 'score'.
* This function does not update the score in the hash table side, the
* caller should take care of it.
*
* Note that this function attempts to just update the node, in case after
* the score update, the node would be exactly at the same position.
* Otherwise the skiplist is modified by removing and re-adding a new
* element, which is more costly.
*
* The function returns the updated element skiplist node pointer. */
zskiplistNode *zslUpdateScore(zskiplist *zsl, double curscore, sds ele, double newscore) {
zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
int i;
/* We need to seek to element to update to start: this is useful anyway,
* we'll have to update or remove it. */
//x指向为head
x = zsl->header;
//从最先层开始遍历
for (i = zsl->level-1; i >= 0; i--) {
//找到每一个层级对应的节点 ,条件满足以下条件
while (x->level[i].forward &&
(x->level[i].forward->score < curscore ||
(x->level[i].forward->score == curscore &&
sdscmp(x->level[i].forward->ele,ele) < 0)))
{
x = x->level[i].forward;
}
//找到每个一个层级的对应节点
update[i] = x;
}
/* Jump to our element: note that this function assumes that the
* element with the matching score exists. */
//回到第一层节点
x = x->level[0].forward;
//判断下是否找到对应的element
serverAssert(x && curscore == x->score && sdscmp(x->ele,ele) == 0);
/* If the node, after the score update, would be still exactly
* at the same position, we can just update the score without
* actually removing and re-inserting the element in the skiplist. */
//判断score的条件是否只需要更新即可
if ((x->backward == NULL || x->backward->score < newscore) &&
(x->level[0].forward == NULL || x->level[0].forward->score > newscore))
{
x->score = newscore;
return x;
}
/* No way to reuse the old node: we need to remove and insert a new
* one at a different place. */
//不能复用旧节点的话就需要做删除操作
zslDeleteNode(zsl, x, update);
//重新新增节点
zskiplistNode *newnode = zslInsert(zsl,newscore,x->ele);
/* We reused the old node x->ele SDS string, free the node now
* since zslInsert created a new one. */
//回收旧的节点内存
x->ele = NULL;
zslFreeNode(x);
//返回新节点
return newnode;
}
这段代码有两个重点信息
1, 一个就是找到每个层那个刚好小于的那个节点,表示若发生更改,会影响到相邻的节点
2,若新的score的位置没有发生变化,则可以直接更改。
跳表删除节点的过程
跳表节点删除代码:
/* Internal function used by zslDelete, zslDeleteRangeByScore and
* zslDeleteRangeByRank. */
//x 为需要删除的节点。
void zslDeleteNode(zskiplist *zsl, zskiplistNode *x, zskiplistNode **update) {
int i;
//对于每一个层级进行更新
for (i = 0; i < zsl->level; i++) {
if (update[i]->level[i].forward == x) {
//因为x删除了,所以数据宽度得加上。
update[i]->level[i].span += x->level[i].span - 1;
//链表通用的删除方式
update[i]->level[i].forward = x->level[i].forward;
} else {
//该层没有x元素只需要减去1
update[i]->level[i].span -= 1;
}
}
//更新backward 指针
if (x->level[0].forward) {
x->level[0].forward->backward = x->backward;
} else {
//x处在队尾,则将backward赋值为队尾部
zsl->tail = x->backward;
}
//什么时候会降级就是从这里来判断
//当head某一个level的forward 为null的时候
while(zsl->level > 1 && zsl->header->level[zsl->level-1].forward == NULL)
zsl->level--;
zsl->length--;
}
删除代码的核心逻辑主要通过刚刚找到的update数组这个每个层级刚刚好的那个节点判断需要删除的节点是否也在那一层,如果在那一层则执行链表的常规操作如果不是则只需要更新span即可
第二个信息就是整个跳表的level什么时候减小的条件,上面的代码也注释得非常清晰。
跳表的增加节点过程
跳表增加节点的代码:
/* Insert a new node in the skiplist. Assumes the element does not already
* exist (up to the caller to enforce that). The skiplist takes ownership
* of the passed SDS string 'ele'. */
//sds 属于值传递所以不需要单独回收
zskiplistNode *zslInsert(zskiplist *zsl, double score, sds ele) {
zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
unsigned int rank[ZSKIPLIST_MAXLEVEL];
int i, level;
serverAssert(!isnan(score));
x = zsl->header;
//同样的从下往上遍历找到每一层对应的位置。
for (i = zsl->level-1; i >= 0; i--) {
/* store rank that is crossed to reach the insert position */
//记录每一层的span
rank[i] = i == (zsl->level-1) ? 0 : rank[i+1];
while (x->level[i].forward &&
(x->level[i].forward->score < score ||
(x->level[i].forward->score == score &&
sdscmp(x->level[i].forward->ele,ele) < 0)))
{
//这里需要记录每层span的累加。
rank[i] += x->level[i].span;
x = x->level[i].forward;
}
update[i] = x;
}
/* we assume the element is not already inside, since we allow duplicated
* scores, reinserting the same element should never happen since the
* caller of zslInsert() should test in the hash table if the element is
* already inside or not. */
//因为zset 有map的结构每次都会判断是否是重复的element
//允许有相同的score 不允许有同样的element
level = zslRandomLevel();
if (level > zsl->level) {
//如果层次增加则需要更新层次
for (i = zsl->level; i < level; i++) {
rank[i] = 0;
update[i] = zsl->header;
update[i]->level[i].span = zsl->length;
}
zsl->level = level;
}
//给新元素分配空间
//这里可以知道每一个节点拥有的层也不相同
x = zslCreateNode(level,score,ele);
for (i = 0; i < level; i++) {
//开始来更新每个level的值
//链表的标准操作
x->level[i].forward = update[i]->level[i].forward;
//update[i] 前置指针指向x
update[i]->level[i].forward = x;
/* update span covered by update[i] as x is inserted here */
//更新x的span
//这个rank用得非常巧妙
//标示了update在每一层对应的位置
x->level[i].span = update[i]->level[i].span - (rank[0] - rank[i]);
//更新update的level
update[i]->level[i].span = (rank[0] - rank[i]) + 1;
}
/* increment span for untouched levels */
//其它未触及到的层次+1
for (i = level; i < zsl->level; i++) {
update[i]->level[i].span++;
}
//设置backward
//这里head做了一个空节点的做法
//这样的好处是head不用变来变去
x->backward = (update[0] == zsl->header) ? NULL : update[0];
//设置backward
if (x->level[0].forward)
x->level[0].forward->backward = x;
else
zsl->tail = x;
zsl->length++;
return x;
}
/* Returns a random level for the new skiplist node we are going to create.
* The return value of this function is between 1 and ZSKIPLIST_MAXLEVEL
* (both inclusive), with a powerlaw-alike distribution where higher
* levels are less likely to be returned. */
int zslRandomLevel(void) {
int level = 1;
//每次插入元素有4分之1几率让level 增加一层
//表示该元素最多会在第几层出现
//比如4则1-4的层级都需要遍历一遍
while ((random()&0xFFFF) < (ZSKIPLIST_P * 0xFFFF))
level += 1;
//最大层次不超过32层
return (level<ZSKIPLIST_MAXLEVEL) ? level : ZSKIPLIST_MAXLEVEL;
}
看完增加节点的代码有几个重心
第一, 每个节点的层级是不一样的。比如最大层级是5,那么并不代表每个节点都有5个层级
第二,头节点的拥有的层级是最大的,为什么,请仔细看上面扩充层级部分。
第三,每个节点至少拥有层级1,层级1的特点是span为1,如果串联起来就是一个包含所有元素的有序链表。
第三,就是rank的用法很细致,需要自己多回味几次。rank对应每一层的update[i]指向节点的排位位置。
下图是演示一个插入的过程,但是省略了指针的指向,有一点也很重要,就是上面画的跳表结构图,就是每个元素都只有一个本体,其它指向操作都是通过指针来完成。
总结
本章主要讲解了关于zset插入和删除和更新操作,还有整个zset的结构,基本上这章把跳表操作的核心内容都有讲到,下一节我们会直接通过分析其查询的几个命令,来阐述zset的用法。