1、zaddGenericCommand(入口函数)
首先命令server.c文件夹下的
{"zadd",zaddCommand,-4,
"write use-memory fast @sortedset",
0,NULL,1,1,1,0,0,0},
而下面是服务器内部执行的逻辑
/* This generic command implements both ZADD and ZINCRBY.
这个通用命令实现了 ZADD 和 ZINCRBY*/
void zaddGenericCommand(client *c, int flags) {
static char *nanerr = "resulting score is not a number (NaN)";
robj *key = c->argv[1];
robj *zobj;
sds ele;
double score = 0, *scores = NULL;
int j, elements, ch = 0;
int scoreidx = 0;
/* The following vars are used in order to track what the command actually
* did during the execution, to reply to the client and to trigger the
* notification of keyspace change.
* 以下 vars 用于跟踪命令在执行期间实际执行的操作,回复客户端并触发
* 键空间更改通知*/
int added = 0; /* Number of new elements added.新增节点的数量 */
int updated = 0; /* Number of elements with updated score. 更新节点的数量*/
int processed = 0; /* Number of elements processed, may remain zero with
options like XX. 已处理的元素数量,使用 XX 等选项可能保持为零*/
/* Parse options. At the end 'scoreidx' is set to the argument position
* of the score of the first score-element pair. 解析选项。最后 'scoreidx' 设置为第一个分数元素对的分数的参数位置 **/
scoreidx = 2;
while(scoreidx < c->argc) {
char *opt = c->argv[scoreidx]->ptr;
if (!strcasecmp(opt,"nx")) flags |= ZADD_IN_NX;//不更新存在的成员。只添加新成员
else if (!strcasecmp(opt,"xx")) flags |= ZADD_IN_XX; //仅仅更新存在的成员,不添加新成员
else if (!strcasecmp(opt,"ch")) ch = 1; /* Return num of elements added or updated.返回添加或更新的元素数量 */
else if (!strcasecmp(opt,"incr")) flags |= ZADD_IN_INCR;//当ZADD指定这个选项时,成员的操作就等同ZINCRBY命令,对成员的分数进行递增操作
else if (!strcasecmp(opt,"gt")) flags |= ZADD_IN_GT;
else if (!strcasecmp(opt,"lt")) flags |= ZADD_IN_LT;
else break;//如果不存在就不设置
scoreidx++;
}
/* Turn options into simple to check vars.
将选项变成简单的检查变量*/
int incr = (flags & ZADD_IN_INCR) != 0;
int nx = (flags & ZADD_IN_NX) != 0;
int xx = (flags & ZADD_IN_XX) != 0;
int gt = (flags & ZADD_IN_GT) != 0;
int lt = (flags & ZADD_IN_LT) != 0;
/* After the options, we expect to have an even number of args, since
* we expect any number of score-element pairs.
在选项之后,我们期望有偶数个参数,因为我们期望任意数量的分数元素对。
相当于ZADD 后的score member希望是成对出现的要
*/
elements = c->argc-scoreidx;
if (elements % 2 || !elements) {
addReplyErrorObject(c,shared.syntaxerr);
return;
}
elements /= 2; /* Now this holds the number of score-element pairs.现在它保存了分数元素对的数量 */
/* Check for incompatible options.检查不兼容的选项 nx和xx不能同时存在*/
if (nx && xx) {
addReplyError(c,
"XX and NX options at the same time are not compatible");
return;
}
//也是检查不兼容的选项
if ((gt && nx) || (lt && nx) || (gt && lt)) {
addReplyError(c,
"GT, LT, and/or NX options at the same time are not compatible");
return;
}
/* Note that XX is compatible with either GT or LT 请注意,XX 与 GT 或 LT 兼容*/
if (incr && elements > 1) {
addReplyError(c,
"INCR option supports a single increment-element pair");
return;
}
/* Start parsing all the scores, we need to emit any syntax error
* before executing additions to the sorted set, as the command should
* either execute fully or nothing at all.
新节点或者要更新的节点用语法解析所有的分值,我们需要在执行对排序集的添加之前发出任何语法错误,因为命令应该要么完全执行,要么根本不执行*/
scores = zmalloc(sizeof(double)*elements);
for (j = 0; j < elements; j++) {
if (getDoubleFromObjectOrReply(c,c->argv[scoreidx+j*2],&scores[j],NULL)
!= C_OK) goto cleanup;
}
/* Lookup the key and create the sorted set if does not exist.如果不存在,则查找键并创建排序集 */
zobj = lookupKeyWrite(c->db,key);
if (checkType(c,zobj,OBJ_ZSET)) goto cleanup;
if (zobj == NULL) {//如果key不存在我们选定的db中,则创建一个空的有序集合
if (xx) goto reply_to_client; /* No key + XX option: nothing to do. */
//如果server.zset_max_ziplist_entries大小设置为0
//或者此次存入的数据长度要大于server.zset_max_ziplist_value的值
//满足上面的两项其中之一就会创建skiplist否则就会创建ziplist
if (server.zset_max_ziplist_entries == 0 ||
server.zset_max_ziplist_value < sdslen(c->argv[scoreidx+1]->ptr))
{
zobj = createZsetObject();
} else {
zobj = createZsetZiplistObject();
}
dbAdd(c->db,key,zobj);
}
for (j = 0; j < elements; j++) {
double newscore;
//分值
score = scores[j];
int retflags = 0;
//元素
ele = c->argv[scoreidx+1+j*2]->ptr;
int retval = zsetAdd(zobj, score, ele, flags, &retflags, &newscore);
if (retval == 0) {
addReplyError(c,nanerr);
goto cleanup;
}
//如果条件成立,代表当前操作为新增节点
if (retflags & ZADD_OUT_ADDED) added++;
如果条件成立,代表当前操作仅为更新节点分值
if (retflags & ZADD_OUT_UPDATED) updated++;
if (!(retflags & ZADD_OUT_NOP)) processed++;
score = newscore;
}
server.dirty += (added+updated);
reply_to_client:
if (incr) { /* ZINCRBY or INCR option. */
if (processed)
addReplyDouble(c,score);
else
addReplyNull(c);
} else { /* ZADD. */
addReplyLongLong(c,ch ? added+updated : added);
}
cleanup:
zfree(scores);
if (added || updated) {
signalModifiedKey(c,c->db,key);
notifyKeyspaceEvent(NOTIFY_ZSET,
incr ? "zincr" : "zadd", key, c->db->id);
}
}
2、zsetAdd(真正执行添加操作的函数)
/* Add a new element or update the score of an existing element in a sorted
* set, regardless of its encoding.
*添加新元素或更新已排序 ,集中现有元素的分数,无论其编码如何
* The set of flags change the command behavior.
*标志集改变命令行为
* The input flags are the following:
*输入标志如下:
* ZADD_INCR: Increment the current element score by 'score' instead of updating
* the current element score. If the element does not exist, we
* assume 0 as previous score.
* 通过 'score' 增加当前元素分数,而不是更新 * 当前元素分数。如果元素不存在,我们 * 假设 0 作为之前的分数
* ZADD_NX: Perform the operation only if the element does not exist.
仅当元素不存在时才执行操作
* ZADD_XX: Perform the operation only if the element already exist.
仅当元素已存在时才执行操作
* ZADD_GT: Perform the operation on existing elements only if the new score is
* greater than the current score.
仅当新分数大于当前分数时才对现有元素执行操作
* ZADD_LT: Perform the operation on existing elements only if the new score is
* less than the current score.
仅当新分数 小于当前分数时才对现有元素执行操作。
*
* When ZADD_INCR is used, the new score of the element is stored in
* '*newscore' if 'newscore' is not NULL.
当使用 ZADD_INCR 时,如果 'newscore' 不为 NULL,则元素的新分数存储在 '*newscore' 中。
*
* The returned flags are the following:返回的标志如下
*
* ZADD_NAN: The resulting score is not a number.结果分数不是数字
* ZADD_ADDED: The element was added (not present before the call)添加了元素(在调用之前不存在).
* ZADD_UPDATED: The element score was updated.元素分数已更新
* ZADD_NOP: No operation was performed because of NX or XX.由于 NX 或 XX 未执行任何操作
*
* Return value:
*
* The function returns 1 on success, and sets the appropriate flags
* ADDED or UPDATED to signal what happened during the operation (note that
* none could be set if we re-added an element using the same score it used
* to have, or in the case a zero increment is used).
*该函数在成功时返回 1,并设置适当的标志 ADDED 或 UPDATED 以指示操作期间发生的情况
(请注意,如果我们使用与 相同的分数重新添加元素,或者在在使用零增量的情况下)。
* The function returns 0 on error, currently only when the increment
* produces a NAN condition, or when the 'score' value is NAN since the
* start.
*该函数在出错时返回 0,目前仅当增量产生 NAN 条件时,或者当“score”值自 开始后为 NAN 时。
* The command as a side effect of adding a new element may convert the sorted
* set internal encoding from ziplist to hashtable+skiplist.
*该命令作为添加新元素的副作用可能会将已排序的 set 内部编码从 ziplist 转换为 hashtable+skiplist
* Memory management of 'ele':
*'ele' 的内存管理
* The function does not take ownership of the 'ele' SDS string, but copies
* it if needed. 该函数不获取 'ele' SDS 字符串的所有权,但在需要时复制 */
int zsetAdd(robj *zobj, double score, sds ele, int in_flags, int *out_flags, double *newscore) {
/* Turn options into simple to check vars. 将选项变成简单的检查变量*/
int incr = (in_flags & ZADD_IN_INCR) != 0;
int nx = (in_flags & ZADD_IN_NX) != 0;
int xx = (in_flags & ZADD_IN_XX) != 0;
int gt = (in_flags & ZADD_IN_GT) != 0;
int lt = (in_flags & ZADD_IN_LT) != 0;
*out_flags = 0; /* We'll return our response flags.我们将返回我们的响应标志 */
double curscore;
/* NaN as input is an error regardless of all the other parameters.无论所有其他参数如何,作为输入的 NaN 都是错误 */
if (isnan(score)) {
*out_flags = ZADD_OUT_NAN;
return 0;
}
/* Update the sorted set according to its encoding.根据其编码更新排序集 */
if (zobj->encoding == OBJ_ENCODING_ZIPLIST) { //如果是ziplist执行里面的逻辑
unsigned char *eptr;
if ((eptr = zzlFind(zobj->ptr,ele,&curscore)) != NULL) {//这个是发现存在此元素
/* NX? Return, same element already exists.NX?返回,相同的元素已经存在 */
if (nx) {
*out_flags |= ZADD_OUT_NOP;
return 1;
}
/* Prepare the score for the increment if needed.
如果需要,为增量准备分数*/
if (incr) {
score += curscore;
if (isnan(score)) {
*out_flags |= ZADD_OUT_NAN;
return 0;
}
}
/* GT/LT? Only update if score is greater/less than current.
仅在分数大于/小于当前分数时更新*/
if ((lt && score >= curscore) || (gt && score <= curscore)) {
*out_flags |= ZADD_OUT_NOP;
return 1;
}
if (newscore) *newscore = score;
/* Remove and re-insert when score changed.
分数更改时删除并重新插入*/
if (score != curscore) {
zobj->ptr = zzlDelete(zobj->ptr,eptr);
zobj->ptr = zzlInsert(zobj->ptr,ele,score);
*out_flags |= ZADD_OUT_UPDATED;
}
return 1;
} else if (!xx) {//如果不是 xx为true,代表不存在此元素也可以操作
/* Optimize: check if the element is too large or the list
* becomes too long *before* executing zzlInsert.
优化:在执行zzlInsert之前检查元素是否太大或列表变得太长,如果是的话把ziplist转换成skiplist**/
zobj->ptr = zzlInsert(zobj->ptr,ele,score);
if (zzlLength(zobj->ptr) > server.zset_max_ziplist_entries ||
sdslen(ele) > server.zset_max_ziplist_value)
zsetConvert(zobj,OBJ_ENCODING_SKIPLIST);
if (newscore) *newscore = score;
*out_flags |= ZADD_OUT_ADDED;
return 1;
} else {
*out_flags |= ZADD_OUT_NOP;
return 1;
}
} else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) { //当底层存储结构是skiplist
zset *zs = zobj->ptr;
zskiplistNode *znode;
dictEntry *de;
//查找此元素是否已存在
de = dictFind(zs->dict,ele);
if (de != NULL) {//如果此元素已存在,则执行下面的
/* NX? Return, same element already exists. 如果是nx,则返回,因为相同的元素已经存在。*/
if (nx) {
*out_flags |= ZADD_OUT_NOP;
return 1;
}
curscore = *(double*)dictGetVal(de);
/* Prepare the score for the increment if needed.如果需要,为增量准备分数 */
if (incr) {
score += curscore;
if (isnan(score)) {
*out_flags |= ZADD_OUT_NAN;
return 0;
}
}
/* GT/LT? Only update if score is greater/less than current.仅在分数大于/小于当前分数时更新。 */
if ((lt && score >= curscore) || (gt && score <= curscore)) {
*out_flags |= ZADD_OUT_NOP;
return 1;
}
if (newscore) *newscore = score;
/* Remove and re-insert when score changes.分数更改时删除并重新插入 */
if (score != curscore) {
znode = zslUpdateScore(zs->zsl,curscore,ele,score);
/* Note that we did not removed the original element from
* the hash table representing the sorted set, so we just
* update the score.
注意我们没有从代表排序集的哈希表中删除原始元素,所以我们只是更新了分数*/
dictGetVal(de) = &znode->score; /* Update score ptr. */
*out_flags |= ZADD_OUT_UPDATED;
}
return 1;
} else if (!xx) { //如果不是 xx为true,代表不存在此元素也可以操作
ele = sdsdup(ele);
//仅仅是将节点插入到跳跃表,不会保存元素和分值在zset->dict的映射
znode = zslInsert(zs->zsl,score,ele);
//需要手动zset->dict上建立元素和分值的映射
serverAssert(dictAdd(zs->dict,ele,&znode->score) == DICT_OK);
*out_flags |= ZADD_OUT_ADDED;
if (newscore) *newscore = score;
return 1;
} else {
*out_flags |= ZADD_OUT_NOP;
return 1;
}
} else {
serverPanic("Unknown sorted set encoding");
}
return 0; /* Never reached. */
}
3、zslInsert(执行skiplist插入新节点的操作)
/* Insert a new node in the skiplist. Assumes the element does not already
* exist (up to the caller to enforce that). The skiplist takes ownership
* of the passed SDS string 'ele'.
在skiplist中插入一个新节点。假设该元素尚未存在(由调用者强制执行)。
skiplist拥有传递的 SDS 字符串 'ele' 的所有权 *。*/
zskiplistNode *zslInsert(zskiplist *zsl, double score, sds ele) {
//`update[i]`数据记录的记录的是每层(用i代表成层数)比新插入的节点刚好小的节点,而此节点的next就是比它大的或者为null的,用于新节点的插入
zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
//`rank[i]` 记录的是每层从header开始(排除header),到update[i]节点之间span和rank[i-1]的累加,
unsigned int rank[ZSKIPLIST_MAXLEVEL];
int i, level;
serverAssert(!isnan(score));
//下面的逻辑需要注意 是如何插入节点的
//在各个层查找节点的插入位置,并且是从最高等级减到0级
x = zsl->header;
for (i = zsl->level-1; i >= 0; i--) {
/* store rank that is crossed to reach the insert position
越过到达插入位置的存储等级*/
// 如果 i 不是 zsl->level-1(最高) 层,相当于初始化
// 那么 i 层的起始 rank 值为 i+1 层的 rank 值
// 各个层的 rank 值一层层累积
// 最终 rank[0] 的值加一就是新节点的前置节点的排位(这里是排除header每层的span)
// rank[0] 会在后面成为计算 span 值和 rank 值的基础(你可能会问为什么?下面有图自己算一下)
rank[i] = i == (zsl->level-1) ? 0 : rank[i+1];
// 沿着前进指针遍历skiplist
while (x->level[i].forward &&
(x->level[i].forward->score < score ||
// 比对分值
(x->level[i].forward->score == score &&
// 比对成员,
sdscmp(x->level[i].forward->ele,ele) < 0)))
{
// 记录沿途跨越了多少个节点
rank[i] += x->level[i].span;
// 移动至下一指针
x = x->level[i].forward;
}
// 记录将要和新节点相连接的节点
update[i] = x;
}
/* we assume the element is not already inside, since we allow duplicated
* scores, reinserting the same element should never happen since the
* caller of zslInsert() should test in the hash table if the element is
* already inside or not.
我们假设元素不在里面,因为我们允许重复的分数,
重新插入相同的元素永远不会发生,
因为 zslInsert() 的调用者应该在哈希表中测试元素是否已经在里面*/
//获取一个随机值作为新节点的层数
level = zslRandomLevel();
// 如果新节点的层数比表中其他节点的层数都要大
// 那么初始化表头节点中未使用的层,并将它们记录到 update 数组中
// 将来也指向新节点
if (level > zsl->level) {
// 初始化未使用层
for (i = zsl->level; i < level; i++) {
rank[i] = 0;
update[i] = zsl->header;
update[i]->level[i].span = zsl->length;
}
// 更新表中节点最大层数
zsl->level = level;
}
// 创建新节点
x = zslCreateNode(level,score,ele);
// 将前面记录的指针指向新节点,并做相应的设置
for (i = 0; i < level; i++) {
// 设置新节点的 forward 指针
x->level[i].forward = update[i]->level[i].forward;
// 将沿途记录的各个节点的 forward 指针指向新节点
update[i]->level[i].forward = x;
/* update span covered by update[i] as x is inserted here
update[i] 覆盖的更新跨度,因为 x 被插入到这里*/
// 计算新节点跨越的节点数量
x->level[i].span = update[i]->level[i].span - (rank[0] - rank[i]);
// 更新新节点插入之后,沿途节点的 span 值
// 其中的 +1 计算的是新节点
update[i]->level[i].span = (rank[0] - rank[i]) + 1;
}
/* increment span for untouched levels
未接触的节点的 span 值也需要增一,这些节点直接从表头指向新节点
*/
for (i = level; i < zsl->level; i++) {
update[i]->level[i].span++;
}
// 设置新节点的后退指针
x->backward = (update[0] == zsl->header) ? NULL : update[0];
if (x->level[0].forward)
x->level[0].forward->backward = x;
else
zsl->tail = x;
// 跳跃表的节点计数增一
zsl->length++;
return x;
}
4、skiplist结构图说明
下面的图我是复制的http://timd.cn/redis-zset/的,并且有部分说明也是这里面的(我自己写可能也没人家写的好并且还这么全),下面的图没有画 backward 指针,但是不影响
- zskiplistNode 是跳表的节点:
- ele 表示跳表中的元素
- score 表示元素的分值,跳表根据该值对节点进行排序
- backward 指向前一个节点,头节点和第一节点的前一个节点是 NULL
- 数组 level
- level 的长度表示节点占据的高度,头节点的高度是 ZSKIPLIST_MAXLEVEL,其它节点的高度在 1 和 ZSKIPLIST_MAXLEVEL 之间(包含 1 和 ZSKIPLIST_MAXLEVEL)
- 分量 level[i] 包含 2 个域:
- forward 指向第 i 层上的下一个节点,最后一个节点在第 i 层上的下一个节点是 NULL
- span 表示到第 i 层的下一个节点之间的节点数量
- zskiplist 是跳表:
- header、tail 分别指向跳表的头节点、尾节点
- length 表示跳表的长度
- level 表示跳表当前的最大层数