Redist 6.2 zset的写入(源码),最后介绍一下skiplist的结构

最新推荐文章于 2022-09-09 11:22:43 发布

胖墩的IT

最新推荐文章于 2022-09-09 11:22:43 发布

阅读量335

点赞数

分类专栏： # Redis 文章标签： redis

本文链接：https://blog.csdn.net/weixin_43113679/article/details/123415256

版权

Redis 专栏收录该内容

49 篇文章 4 订阅

订阅专栏

文章目录

1、zaddGenericCommand(入口函数)
2、zsetAdd（真正执行添加操作的函数）
3、zslInsert(执行skiplist插入新节点的操作)
4、skiplist结构图说明

1、zaddGenericCommand(入口函数)

首先命令server.c文件夹下的

  {"zadd",zaddCommand,-4,
     "write use-memory fast @sortedset",
     0,NULL,1,1,1,0,0,0},

而下面是服务器内部执行的逻辑


/* This generic command implements both ZADD and ZINCRBY.
 这个通用命令实现了 ZADD 和 ZINCRBY*/
void zaddGenericCommand(client *c, int flags) {
    static char *nanerr = "resulting score is not a number (NaN)";
    robj *key = c->argv[1];
    robj *zobj;
    sds ele;
    double score = 0, *scores = NULL;
    int j, elements, ch = 0;
    int scoreidx = 0;
    /* The following vars are used in order to track what the command actually
     * did during the execution, to reply to the client and to trigger the
     * notification of keyspace change.
     * 以下 vars 用于跟踪命令在执行期间实际执行的操作，回复客户端并触发
     * 键空间更改通知*/
    int added = 0;      /* Number of new elements added.新增节点的数量 */
    int updated = 0;    /* Number of elements with updated score. 更新节点的数量*/
    int processed = 0;  /* Number of elements processed, may remain zero with
                           options like XX. 已处理的元素数量，使用 XX 等选项可能保持为零*/

    /* Parse options. At the end 'scoreidx' is set to the argument position
     * of the score of the first score-element pair. 解析选项。最后 'scoreidx' 设置为第一个分数元素对的分数的参数位置 **/
    scoreidx = 2;
    while(scoreidx < c->argc) {
        char *opt = c->argv[scoreidx]->ptr;
        if (!strcasecmp(opt,"nx")) flags |= ZADD_IN_NX;//不更新存在的成员。只添加新成员
        else if (!strcasecmp(opt,"xx")) flags |= ZADD_IN_XX; //仅仅更新存在的成员，不添加新成员
        else if (!strcasecmp(opt,"ch")) ch = 1; /* Return num of elements added or updated.返回添加或更新的元素数量 */
        else if (!strcasecmp(opt,"incr")) flags |= ZADD_IN_INCR;//当ZADD指定这个选项时，成员的操作就等同ZINCRBY命令，对成员的分数进行递增操作
        else if (!strcasecmp(opt,"gt")) flags |= ZADD_IN_GT;
        else if (!strcasecmp(opt,"lt")) flags |= ZADD_IN_LT;
        else break;//如果不存在就不设置
        scoreidx++;
    }

    /* Turn options into simple to check vars.
     将选项变成简单的检查变量*/
    int incr = (flags & ZADD_IN_INCR) != 0;
    int nx = (flags & ZADD_IN_NX) != 0;
    int xx = (flags & ZADD_IN_XX) != 0;
    int gt = (flags & ZADD_IN_GT) != 0;
    int lt = (flags & ZADD_IN_LT) != 0;

    /* After the options, we expect to have an even number of args, since
     * we expect any number of score-element pairs.
     在选项之后，我们期望有偶数个参数，因为我们期望任意数量的分数元素对。
     相当于ZADD 后的score member希望是成对出现的要
     */
    elements = c->argc-scoreidx;
    if (elements % 2 || !elements) {
        addReplyErrorObject(c,shared.syntaxerr);
        return;
    }
    elements /= 2; /* Now this holds the number of score-element pairs.现在它保存了分数元素对的数量 */

    /* Check for incompatible options.检查不兼容的选项 nx和xx不能同时存在*/
    if (nx && xx) {
        addReplyError(c,
            "XX and NX options at the same time are not compatible");
        return;
    }
    //也是检查不兼容的选项
    if ((gt && nx) || (lt && nx) || (gt && lt)) {
        addReplyError(c,
            "GT, LT, and/or NX options at the same time are not compatible");
        return;
    }
    /* Note that XX is compatible with either GT or LT 请注意，XX 与 GT 或 LT 兼容*/

    if (incr && elements > 1) {
        addReplyError(c,
            "INCR option supports a single increment-element pair");
        return;
    }

    /* Start parsing all the scores, we need to emit any syntax error
     * before executing additions to the sorted set, as the command should
     * either execute fully or nothing at all.
     新节点或者要更新的节点用语法解析所有的分值，我们需要在执行对排序集的添加之前发出任何语法错误，因为命令应该要么完全执行，要么根本不执行*/
    scores = zmalloc(sizeof(double)*elements);
    for (j = 0; j < elements; j++) {
        if (getDoubleFromObjectOrReply(c,c->argv[scoreidx+j*2],&scores[j],NULL)
            != C_OK) goto cleanup;
    }

    /* Lookup the key and create the sorted set if does not exist.如果不存在，则查找键并创建排序集 */
    zobj = lookupKeyWrite(c->db,key);
    if (checkType(c,zobj,OBJ_ZSET)) goto cleanup;
    if (zobj == NULL) {//如果key不存在我们选定的db中，则创建一个空的有序集合
        if (xx) goto reply_to_client; /* No key + XX option: nothing to do. */
        //如果server.zset_max_ziplist_entries大小设置为0
        //或者此次存入的数据长度要大于server.zset_max_ziplist_value的值
        //满足上面的两项其中之一就会创建skiplist否则就会创建ziplist
        if (server.zset_max_ziplist_entries == 0 ||
            server.zset_max_ziplist_value < sdslen(c->argv[scoreidx+1]->ptr))
        {
            zobj = createZsetObject();
        } else {
            zobj = createZsetZiplistObject();
        }
        dbAdd(c->db,key,zobj);
    }

    for (j = 0; j < elements; j++) {
        double newscore;
        //分值
        score = scores[j];
        int retflags = 0;
        //元素
        ele = c->argv[scoreidx+1+j*2]->ptr;
        int retval = zsetAdd(zobj, score, ele, flags, &retflags, &newscore);
        if (retval == 0) {
            addReplyError(c,nanerr);
            goto cleanup;
        }
        //如果条件成立，代表当前操作为新增节点
        if (retflags & ZADD_OUT_ADDED) added++;
        如果条件成立，代表当前操作仅为更新节点分值
        if (retflags & ZADD_OUT_UPDATED) updated++;
        if (!(retflags & ZADD_OUT_NOP)) processed++;
        score = newscore;
    }
    server.dirty += (added+updated);

reply_to_client:
    if (incr) { /* ZINCRBY or INCR option. */
        if (processed)
            addReplyDouble(c,score);
        else
            addReplyNull(c);
    } else { /* ZADD. */
        addReplyLongLong(c,ch ? added+updated : added);
    }

cleanup:
    zfree(scores);
    if (added || updated) {
        signalModifiedKey(c,c->db,key);
        notifyKeyspaceEvent(NOTIFY_ZSET,
            incr ? "zincr" : "zadd", key, c->db->id);
    }
}

2、zsetAdd（真正执行添加操作的函数）


/* Add a new element or update the score of an existing element in a sorted
 * set, regardless of its encoding.
 *添加新元素或更新已排序 ,集中现有元素的分数，无论其编码如何
 * The set of flags change the command behavior. 
 *标志集改变命令行为
 * The input flags are the following:
 *输入标志如下：
 * ZADD_INCR: Increment the current element score by 'score' instead of updating
 *            the current element score. If the element does not exist, we
 *            assume 0 as previous score.
 * 通过 'score' 增加当前元素分数，而不是更新 * 当前元素分数。如果元素不存在，我们 * 假设 0 作为之前的分数
 * ZADD_NX:   Perform the operation only if the element does not exist.
              仅当元素不存在时才执行操作
 * ZADD_XX:   Perform the operation only if the element already exist.
               仅当元素已存在时才执行操作
 * ZADD_GT:   Perform the operation on existing elements only if the new score is 
 *            greater than the current score.
            仅当新分数大于当前分数时才对现有元素执行操作
 * ZADD_LT:   Perform the operation on existing elements only if the new score is 
 *            less than the current score.
            仅当新分数 小于当前分数时才对现有元素执行操作。
 *
 * When ZADD_INCR is used, the new score of the element is stored in
 * '*newscore' if 'newscore' is not NULL.
 当使用 ZADD_INCR 时，如果 'newscore' 不为 NULL，则元素的新分数存储在 '*newscore' 中。
 *
 * The returned flags are the following:返回的标志如下
 *
 * ZADD_NAN:     The resulting score is not a number.结果分数不是数字
 * ZADD_ADDED:   The element was added (not present before the call)添加了元素（在调用之前不存在）.
 * ZADD_UPDATED: The element score was updated.元素分数已更新
 * ZADD_NOP:     No operation was performed because of NX or XX.由于 NX 或 XX 未执行任何操作
 *
 * Return value:
 *
 * The function returns 1 on success, and sets the appropriate flags
 * ADDED or UPDATED to signal what happened during the operation (note that
 * none could be set if we re-added an element using the same score it used
 * to have, or in the case a zero increment is used).
 *该函数在成功时返回 1，并设置适当的标志 ADDED 或 UPDATED 以指示操作期间发生的情况
   （请注意，如果我们使用与 相同的分数重新添加元素，或者在在使用零增量的情况下）。
 * The function returns 0 on error, currently only when the increment
 * produces a NAN condition, or when the 'score' value is NAN since the
 * start.
 *该函数在出错时返回 0，目前仅当增量产生 NAN 条件时，或者当“score”值自 开始后为 NAN 时。
 * The command as a side effect of adding a new element may convert the sorted
 * set internal encoding from ziplist to hashtable+skiplist.
 *该命令作为添加新元素的副作用可能会将已排序的 set 内部编码从 ziplist 转换为 hashtable+skiplist
 * Memory management of 'ele':
 *'ele' 的内存管理
 * The function does not take ownership of the 'ele' SDS string, but copies
 * it if needed. 该函数不获取 'ele' SDS 字符串的所有权，但在需要时复制 */
int zsetAdd(robj *zobj, double score, sds ele, int in_flags, int *out_flags, double *newscore) {
    /* Turn options into simple to check vars. 将选项变成简单的检查变量*/
    int incr = (in_flags & ZADD_IN_INCR) != 0;
    int nx = (in_flags & ZADD_IN_NX) != 0;
    int xx = (in_flags & ZADD_IN_XX) != 0;
    int gt = (in_flags & ZADD_IN_GT) != 0;
    int lt = (in_flags & ZADD_IN_LT) != 0;
    *out_flags = 0; /* We'll return our response flags.我们将返回我们的响应标志 */
    double curscore;

    /* NaN as input is an error regardless of all the other parameters.无论所有其他参数如何，作为输入的 NaN 都是错误 */
    if (isnan(score)) {
        *out_flags = ZADD_OUT_NAN;
        return 0;
    }

    /* Update the sorted set according to its encoding.根据其编码更新排序集 */
    if (zobj->encoding == OBJ_ENCODING_ZIPLIST) { //如果是ziplist执行里面的逻辑
        unsigned char *eptr;

        if ((eptr = zzlFind(zobj->ptr,ele,&curscore)) != NULL) {//这个是发现存在此元素
            /* NX? Return, same element already exists.NX？返回，相同的元素已经存在 */
            if (nx) {
                *out_flags |= ZADD_OUT_NOP;
                return 1;
            }

            /* Prepare the score for the increment if needed.
             如果需要，为增量准备分数*/
            if (incr) {
                score += curscore;
                if (isnan(score)) {
                    *out_flags |= ZADD_OUT_NAN;
                    return 0;
                }
            }

            /* GT/LT? Only update if score is greater/less than current.
             仅在分数大于/小于当前分数时更新*/
            if ((lt && score >= curscore) || (gt && score <= curscore)) {
                *out_flags |= ZADD_OUT_NOP;
                return 1;
            }

            if (newscore) *newscore = score;

            /* Remove and re-insert when score changed.
            分数更改时删除并重新插入*/
            if (score != curscore) {
                zobj->ptr = zzlDelete(zobj->ptr,eptr);
                zobj->ptr = zzlInsert(zobj->ptr,ele,score);
                *out_flags |= ZADD_OUT_UPDATED;
            }
            return 1;
        } else if (!xx) {//如果不是 xx为true,代表不存在此元素也可以操作
            /* Optimize: check if the element is too large or the list
             * becomes too long *before* executing zzlInsert.
             优化：在执行zzlInsert之前检查元素是否太大或列表变得太长，如果是的话把ziplist转换成skiplist**/
            zobj->ptr = zzlInsert(zobj->ptr,ele,score);
            if (zzlLength(zobj->ptr) > server.zset_max_ziplist_entries ||
                sdslen(ele) > server.zset_max_ziplist_value)
                zsetConvert(zobj,OBJ_ENCODING_SKIPLIST);
            if (newscore) *newscore = score;
            *out_flags |= ZADD_OUT_ADDED;
            return 1;
        } else {
            *out_flags |= ZADD_OUT_NOP;
            return 1;
        }
    } else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) { //当底层存储结构是skiplist
        zset *zs = zobj->ptr;
        zskiplistNode *znode;
        dictEntry *de;
        //查找此元素是否已存在
        de = dictFind(zs->dict,ele);
        if (de != NULL) {//如果此元素已存在，则执行下面的
            /* NX? Return, same element already exists. 如果是nx，则返回，因为相同的元素已经存在。*/
            if (nx) {
                *out_flags |= ZADD_OUT_NOP;
                return 1;
            }

            curscore = *(double*)dictGetVal(de);

            /* Prepare the score for the increment if needed.如果需要，为增量准备分数 */
            if (incr) {
                score += curscore;
                if (isnan(score)) {
                    *out_flags |= ZADD_OUT_NAN;
                    return 0;
                }
            }

            /* GT/LT? Only update if score is greater/less than current.仅在分数大于/小于当前分数时更新。 */
            if ((lt && score >= curscore) || (gt && score <= curscore)) {
                *out_flags |= ZADD_OUT_NOP;
                return 1;
            }

            if (newscore) *newscore = score;

            /* Remove and re-insert when score changes.分数更改时删除并重新插入 */
            if (score != curscore) {
                znode = zslUpdateScore(zs->zsl,curscore,ele,score);
                /* Note that we did not removed the original element from
                 * the hash table representing the sorted set, so we just
                 * update the score.
                 注意我们没有从代表排序集的哈希表中删除原始元素，所以我们只是更新了分数*/
                dictGetVal(de) = &znode->score; /* Update score ptr. */
                *out_flags |= ZADD_OUT_UPDATED;
            }
            return 1;
        } else if (!xx) { //如果不是 xx为true,代表不存在此元素也可以操作
            ele = sdsdup(ele);
            //仅仅是将节点插入到跳跃表，不会保存元素和分值在zset->dict的映射
            znode = zslInsert(zs->zsl,score,ele);
            //需要手动zset->dict上建立元素和分值的映射
            serverAssert(dictAdd(zs->dict,ele,&znode->score) == DICT_OK);
            *out_flags |= ZADD_OUT_ADDED;
            if (newscore) *newscore = score;
            return 1;
        } else {
            *out_flags |= ZADD_OUT_NOP;
            return 1;
        }
    } else {
        serverPanic("Unknown sorted set encoding");
    }
    return 0; /* Never reached. */
}

3、zslInsert(执行skiplist插入新节点的操作)


/* Insert a new node in the skiplist. Assumes the element does not already
 * exist (up to the caller to enforce that). The skiplist takes ownership
 * of the passed SDS string 'ele'.
 在skiplist中插入一个新节点。假设该元素尚未存在（由调用者强制执行）。
 skiplist拥有传递的 SDS 字符串 'ele' 的所有权 *。*/
zskiplistNode *zslInsert(zskiplist *zsl, double score, sds ele) {
	//`update[i]`数据记录的记录的是每层(用i代表成层数)比新插入的节点刚好小的节点，而此节点的next就是比它大的或者为null的，用于新节点的插入
    zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
    //`rank[i]` 记录的是每层从header开始(排除header)，到update[i]节点之间span和rank[i-1]的累加，
    unsigned int rank[ZSKIPLIST_MAXLEVEL];
    int i, level;

    serverAssert(!isnan(score));
    //下面的逻辑需要注意 是如何插入节点的
    //在各个层查找节点的插入位置,并且是从最高等级减到0级
    x = zsl->header;
    for (i = zsl->level-1; i >= 0; i--) {
        /* store rank that is crossed to reach the insert position
        越过到达插入位置的存储等级*/
        // 如果 i 不是 zsl->level-1(最高) 层，相当于初始化
        // 那么 i 层的起始 rank 值为 i+1 层的 rank 值
        // 各个层的 rank 值一层层累积
        // 最终 rank[0] 的值加一就是新节点的前置节点的排位(这里是排除header每层的span)
        // rank[0] 会在后面成为计算 span 值和 rank 值的基础(你可能会问为什么?下面有图自己算一下)
        rank[i] = i == (zsl->level-1) ? 0 : rank[i+1];
        // 沿着前进指针遍历skiplist
        while (x->level[i].forward &&
                (x->level[i].forward->score < score ||
                // 比对分值
                    (x->level[i].forward->score == score &&
                     // 比对成员，
                    sdscmp(x->level[i].forward->ele,ele) < 0)))
        {
             // 记录沿途跨越了多少个节点
            rank[i] += x->level[i].span;
            // 移动至下一指针
            x = x->level[i].forward;
        }
        // 记录将要和新节点相连接的节点
        update[i] = x;
    }
    /* we assume the element is not already inside, since we allow duplicated
     * scores, reinserting the same element should never happen since the
     * caller of zslInsert() should test in the hash table if the element is
     * already inside or not.
     我们假设元素不在里面，因为我们允许重复的分数，
     重新插入相同的元素永远不会发生，
     因为 zslInsert() 的调用者应该在哈希表中测试元素是否已经在里面*/
     //获取一个随机值作为新节点的层数
    level = zslRandomLevel();
    // 如果新节点的层数比表中其他节点的层数都要大
    // 那么初始化表头节点中未使用的层，并将它们记录到 update 数组中
    // 将来也指向新节点
    if (level > zsl->level) {
        // 初始化未使用层
        for (i = zsl->level; i < level; i++) {
            rank[i] = 0;
            update[i] = zsl->header;
            update[i]->level[i].span = zsl->length;
        }
        // 更新表中节点最大层数
        zsl->level = level;
    }
    // 创建新节点
    x = zslCreateNode(level,score,ele);
    // 将前面记录的指针指向新节点，并做相应的设置
    for (i = 0; i < level; i++) {
       // 设置新节点的 forward 指针
        x->level[i].forward = update[i]->level[i].forward;
        // 将沿途记录的各个节点的 forward 指针指向新节点
        update[i]->level[i].forward = x;

        /* update span covered by update[i] as x is inserted here
        update[i] 覆盖的更新跨度，因为 x 被插入到这里*/
        // 计算新节点跨越的节点数量
        x->level[i].span = update[i]->level[i].span - (rank[0] - rank[i]);
        // 更新新节点插入之后，沿途节点的 span 值
        // 其中的 +1 计算的是新节点
        update[i]->level[i].span = (rank[0] - rank[i]) + 1;
    }

    /* increment span for untouched levels
      未接触的节点的 span 值也需要增一，这些节点直接从表头指向新节点
    */
    for (i = level; i < zsl->level; i++) {
        update[i]->level[i].span++;
    }
    // 设置新节点的后退指针
    x->backward = (update[0] == zsl->header) ? NULL : update[0];
    if (x->level[0].forward)
        x->level[0].forward->backward = x;
    else
        zsl->tail = x;
    // 跳跃表的节点计数增一
    zsl->length++;
    return x;
}

4、skiplist结构图说明

下面的图我是复制的http://timd.cn/redis-zset/的，并且有部分说明也是这里面的(我自己写可能也没人家写的好并且还这么全)，下面的图没有画 backward 指针，但是不影响

在这里插入图片描述

zskiplistNode 是跳表的节点：
- ele 表示跳表中的元素
- score 表示元素的分值，跳表根据该值对节点进行排序
- backward 指向前一个节点，头节点和第一节点的前一个节点是 NULL
- 数组 level
  - level 的长度表示节点占据的高度，头节点的高度是 ZSKIPLIST_MAXLEVEL，其它节点的高度在 1 和 ZSKIPLIST_MAXLEVEL 之间（包含 1 和 ZSKIPLIST_MAXLEVEL）
  - 分量 level[i] 包含 2 个域：
    - forward 指向第 i 层上的下一个节点，最后一个节点在第 i 层上的下一个节点是 NULL
    - span 表示到第 i 层的下一个节点之间的节点数量
zskiplist 是跳表：
- header、tail 分别指向跳表的头节点、尾节点
- length 表示跳表的长度
- level 表示跳表当前的最大层数

胖墩的IT

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
Redist 6.2 zset的写入(源码),最后介绍一下skiplist的结构

文章目录1、zaddGenericCommand(入口函数)2、zsetAdd（真正执行添加操作的函数）3、zslInsert(执行skiplist插入新节点的操作)4、skiplist结构图说明1、zaddGenericCommand(入口函数)首先命令server.c文件夹下的 {"zadd",zaddCommand,-4, "write use-memory fast @sortedset", 0,NULL,1,1,1,0,0,0},而下面是服务器内部执行的逻辑/*
复制链接

扫一扫