Redis源码分析（七）——压缩列表Ziplist

最新推荐文章于 2021-11-04 08:49:20 发布

独孤_子喻

最新推荐文章于 2021-11-04 08:49:20 发布

阅读量738

点赞数

分类专栏： Redis源码分析

本文链接：https://blog.csdn.net/yuyixinye/article/details/40303403

版权

Redis源码分析专栏收录该内容

25 篇文章 5 订阅

订阅专栏

Ziplist是由一系列特殊编码的内存块构成的列表，一个ziplist可以包含多个节点（entry）,每个节点可以保存一个长度有限的字符数组（不以\0结尾的char数组）或者整数。其中字符数组分为三个长度等级：小等于2^6-1 或者小等于2^14-1 或者小等于2^32-1 字节的字符数组。整数包括：4bit长的0至12之间的无符号整数或 1字节或 3字节有符号整数或 16或32 或64字节整数。

Ziplist与链表有本质的不同。Ziplist整个列表是由一块连续的内存构成，列表中的节点没有类似链表节点中的next或prev指针，而是在每个节点中记录自身的字节长度以及其前驱节点的字节长度，从而进行指针移动（向前或向后移动节点长度的字节数）来进行正向或者反向的遍历操作。由于节点中不用存放类似链表的next或prev指针，而把所有节点压缩在一块连续的内存中，这样相对链表节约了内存。

以下主要对Ziplist的构成，其节点的构成以及几个主要的API函数进行分析。

整个ziplist的基本结构如下图：

其中各个值域的含义如下：

而其节点结构：

typedef struct zlentry {
    unsigned int prevrawlensize, prevrawlen;
	//prevrawlen：前一节点长度，这通过该值可从当前节点位置后退跳转到上一个节点
	//prevrawlensize：记录前一个节点的长度所需的字节数。
    unsigned int lensize, len;
	//len:当前节点所记录数据的长度
	//lensize：表示节点所记录数据的长度 所需的字节数
    unsigned int headersize;//节点的头部长度
    unsigned char encoding;//2bit。标识存放的数据类型 00 01 10：表示保存这字符数组； 11：保存的整数
    unsigned char *p;//保存节点的内容（包括节点头部分的内容）。其内容的类型和长度由encoding 和lensize决定
} zlentry;

节点基本结构如下图：

几个主要的API函数 ziplist.c：

ziplist的所有操作都是基于内存块大小的重新分配以及指针在内存上的前后移动而实现的，这些实现正是依赖于在每个节点中保存了的其前驱节点和自身节点的字节大小。

__ziplistCascadeUpdate ：连锁更新p指向节点的后续节点。
当一个新节点插入时，可能需要重新设置插入点的next节点的 prevlen域的大小以便存放新节点的大小（如果原来为1字节大小，当新节点较大时就需要把 prevlen域重置为5个字节）。这将发生在新节点已经插入后（插入时会引发列表的重新分配空间和内存的移动）。重置 prevlen域的大小会改变该节点的总大小，从可能而导致其后续节点的 prevlen域的连锁变化。这种情况也可能逆转，即前 prevlen域可能缩小（也可能产生连锁反应）。在插入一个较大节点后使得 prevlen域变大，然后再插入一个较小的节点会使得先去扩大为5个字节的 prevlen域缩小，从而连锁影响后续节点的 prevlen域。实际上在实现时故意忽略了这种缩小的情况。

static unsigned char *__ziplistCascadeUpdate(unsigned char *zl, unsigned char *p) {
    size_t curlen = intrev32ifbe(ZIPLIST_BYTES(zl)), rawlen, rawlensize;
    size_t offset, noffset, extra;
    unsigned char *np;
    zlentry cur, next;

    while (p[0] != ZIP_END) {
        cur = zipEntry(p);//当前节点
        rawlen = cur.headersize + cur.len;
        rawlensize = zipPrevEncodeLength(NULL,(unsigned int)rawlen);

        /* Abort if there is no next entry. */
        if (p[rawlen] == ZIP_END) break;
        next = zipEntry(p+rawlen);

        /* Abort when "prevlen" has not changed. */
        if (next.prevrawlen == rawlen) break;//原prevrawlen域足够大，不需增大

        if (next.prevrawlensize < rawlensize) {//原prevrawlen域需要增大    
            /* The "prevlen" field of "next" needs more bytes to hold
             * the raw length of "cur". */
            offset = p-zl;
            extra = rawlensize-next.prevrawlensize;
            zl = ziplistResize(zl,(unsigned int)(curlen+extra));
            p = zl+offset;

            /* Current pointer and offset for next element. */
            np = p+rawlen;
            noffset = np-zl;

            /* Update tail offset when next element is not the tail element. */
            if ((zl+intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))) != np) {
                ZIPLIST_TAIL_OFFSET(zl) =
                    (uint32_t)intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+extra);
            }

            /* Move the tail to the back. */
            memmove(np+rawlensize,
                np+next.prevrawlensize,
                curlen-noffset-next.prevrawlensize-1);
            zipPrevEncodeLength(np,(unsigned int)rawlen);

            /* Advance the cursor */
            p += rawlen;
            curlen += extra;
        } else {
            if (next.prevrawlensize > rawlensize) {//忽略“prevlen域缩小”
                /* This would result in shrinking, which we want to avoid.
                 * So, set "rawlen" in the available bytes. */
                zipPrevEncodeLengthForceLarge(p+rawlen,(unsigned int)rawlen);
            } else {
                zipPrevEncodeLength(p+rawlen,(unsigned int)rawlen);
            }

            /* Stop here, as the raw length of "next" has not changed. */
            break;  //没有产生连锁影响，直接跳出循环
        }
    }
    return zl;
}

__ziplistInsert： 在指定位置插入新节点。

如果插入点在列表的末尾，首先重新分配列表空间为新节点预留空间，然后要插入新节点并设置新节点的各个域的值，最后更新列表的相应值即可。
如果插入点在列表的中间某位置，则要考虑新节点的插入对列表中后续节点的影响，这可能产生连锁影响。

static unsigned char *__ziplistInsert(unsigned char *zl, unsigned char *p, unsigned char *s, unsigned int slen) {
    size_t curlen = intrev32ifbe(ZIPLIST_BYTES(zl)), reqlen;//curlen ：当前总长度
    unsigned int prevlensize, prevlen = 0;
    size_t offset;
    int nextdiff = 0;
    unsigned char encoding = 0;
    long long value = 123456789; /* initialized to avoid warning. Using a value
                                    that is easy to see if for some reason
                                    we use it uninitialized. */
    zlentry tail;

    /* Find out prevlen for the entry that is inserted. */
	//找出插入节点的prevlen

	//插入点p不指向表尾
    if (p[0] != ZIP_END) { 
        ZIP_DECODE_PREVLEN(p, prevlensize, prevlen);//prevlen 存放p所指节点的前向节点的总长度
    } else { //如果插入点p指向尾节点
        unsigned char *ptail = ZIPLIST_ENTRY_TAIL(zl);//表尾节点的地址
        if (ptail[0] != ZIP_END) {//表尾部节点不为空（列表为空时尾部节点指针即指向zlend）
            prevlen = zipRawEntryLength(ptail);//尾节点所占的字节数
        }
    }

    /* See if the entry can be encoded */
    if (zipTryEncoding(s,slen,&value,&encoding)) {//如能把字符串转换为一个长整型
        /* 'encoding' is set to the appropriate integer encoding */
        reqlen = zipIntSize(encoding);//把字符串编码为整数所需的编码长度
    } else {
        /* 'encoding' is untouched, however zipEncodeLength will use the
         * string length to figure out how to encode it. */
        reqlen = slen;//字符串所需编码长度
    }
    /* We need space for both the length of the previous entry and
     * the length of the payload. */
    reqlen += zipPrevEncodeLength(NULL,(unsigned int)prevlen);//加上前向节点的长度字节
    reqlen += zipEncodeLength(NULL,encoding,slen);//加上节点自身的content部分的编码长度

    /* When the insert position is not equal to the tail, we need to
     * make sure that the next entry can hold this entry's length in
     * its prevlen field. */
	//如果插入点不在最后一个节点，则需要确保插入点的next节点的prevlen 足够存放插入节点的长度编码
    nextdiff = (p[0] != ZIP_END) ? zipPrevLenByteDiff(p,(unsigned int)reqlen) : 0;//计算prevlen 域需要的变化量

    /* Store offset because a realloc may change the address of zl. */
    offset = p-zl;
	//重置列表空间，为新节点预留空间
    zl = ziplistResize(zl,(unsigned int)(curlen+reqlen+nextdiff));//当前列表总长+新节点长度 + 新节点next的prevlen 域的变化长度
    p = zl+offset;//指向新列表的插入点位置

    /* Apply memory move when necessary and update tail offset. */
	//插入点不在表尾
    if (p[0] != ZIP_END) {
        /* Subtract one because of the ZIP_END bytes */
        memmove(p+reqlen,p-nextdiff,curlen-offset-1+nextdiff);

        /* Encode this entry's raw length in the next entry. */
		//在next的prevlen写入新节点的长度编码
        zipPrevEncodeLength(p+reqlen,(unsigned int)reqlen);

        /* Update offset for tail */
        ZIPLIST_TAIL_OFFSET(zl) =//更新尾节点的偏移量
            (uint32_t)intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+reqlen);

        /* When the tail contains more than one entry, we need to take
         * "nextdiff" in account as well. Otherwise, a change in the
         * size of prevlen doesn't have an effect on the *tail* offset. */
        tail = zipEntry(p+reqlen);
        if (p[reqlen+tail.headersize+tail.len] != ZIP_END) {//新节点后多余一个节点
            ZIPLIST_TAIL_OFFSET(zl) =
                intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+nextdiff);
        }
    } else {//插入点在表尾
        /* This element will be the new tail. */
        ZIPLIST_TAIL_OFFSET(zl) = (uint32_t)intrev32ifbe(p-zl);
    }

    /* When nextdiff != 0, the raw length of the next entry has changed, so
     * we need to cascade the update throughout the ziplist */
    if (nextdiff != 0) {//可能产生的连锁影响
        offset = p-zl;
        zl = __ziplistCascadeUpdate(zl,p+reqlen);//更新新节点之后的所有节点
        p = zl+offset;
    }

    /* Write the entry */
	//写入新节点内容
    p += zipPrevEncodeLength(p,(unsigned int)prevlen);
    p += zipEncodeLength(p,encoding,slen);
    if (ZIP_IS_STR(encoding)) {
        memcpy(p,s,slen);
    } else {
        zipSaveInteger(p,value,encoding);
    }
    ZIPLIST_INCR_LENGTH(zl,1);//跟新节点总数
    return zl;
}

__ziplistDelete：删除指针p指向节点开始的num个节点，返回列表指针

static unsigned char *__ziplistDelete(unsigned char *zl, unsigned char *p, unsigned int num) {
    unsigned int i, totlen, deleted = 0;
    size_t offset;
    int nextdiff = 0;
    zlentry first, tail;

    first = zipEntry(p);//first指向第一个需要删除的节点
    for (i = 0; p[0] != ZIP_END && i < num; i++) {
        p += zipRawEntryLength(p);//指针p移到所需删除的最后一个节点的尾部
        deleted++;
    }

    totlen = (unsigned int)(p-first.p);//删除的总的字节数
    if (totlen > 0) {
        if (p[0] != ZIP_END) {
            /* Storing `prevrawlen` in this entry may increase or decrease the
             * number of bytes required compare to the current `prevrawlen`.
             * There always is room to store this, because it was previously
             * stored by an entry that is now being deleted. */
            nextdiff = zipPrevLenByteDiff(p,first.prevrawlen);//所需删除的最后一个节点的总长度与第一个删除节点的前向节点的总长度之差
            p -= nextdiff;//直接调整所需删除的最后一个节点的next节点的起始位置（前移/后移 偏移量）（因为其前向节点变量，导致其.prevrawlen域可能变化），而在该节点的前面由于删除了num个节点，因此有一块区域可直接用于调整，而不需要向后调整。
			//prevrawlen域如果增大，会导致后续节点的连锁影响（最后处理）
            zipPrevEncodeLength(p,first.prevrawlen);//重新写入新前向节点的长度

            /* Update offset for tail */
            ZIPLIST_TAIL_OFFSET(zl) =//更新列表的尾节点的偏移量（没有包括所需删除的最后一个节点的prevrawlen域的可能变化）
                intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))-totlen);

            /* When the tail contains more than one entry, we need to take
             * "nextdiff" in account as well. Otherwise, a change in the
             * size of prevlen doesn't have an effect on the *tail* offset. */
            tail = zipEntry(p);
            if (p[tail.headersize+tail.len] != ZIP_END) {
                ZIPLIST_TAIL_OFFSET(zl) =
                   intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+nextdiff);
            }

            /* Move tail to the front of the ziplist */
            memmove(first.p,p,
                intrev32ifbe(ZIPLIST_BYTES(zl))-(p-zl)-1);
        } else {
            /* The entire tail was deleted. No need to move memory. */
            ZIPLIST_TAIL_OFFSET(zl) =
                (unsigned int)intrev32ifbe((first.p-zl)-first.prevrawlen);
        }

        /* Resize and update length */
        offset = first.p-zl;
        zl = ziplistResize(zl, intrev32ifbe(ZIPLIST_BYTES(zl))-totlen+nextdiff);//重新分配空间
        ZIPLIST_INCR_LENGTH(zl,-deleted);//更新节点数
        p = zl+offset;

        /* When nextdiff != 0, the raw length of the next entry has changed, so
         * we need to cascade the update throughout the ziplist */
        if (nextdiff != 0)//如果所需删除的最后一个节点的prevrawlen域增大，将会导致后续节点的连锁影响
            zl = __ziplistCascadeUpdate(zl,p);
    }
    return zl;
}

查找给定节点的next或者prev节点：

/* Return pointer to next entry in ziplist.
 *
 * zl is the pointer to the ziplist
 * p is the pointer to the current element
 *
 * The element after 'p' is returned, otherwise NULL if we are at the end. */
//返回给定节点next节点的内容指针
unsigned char *ziplistNext(unsigned char *zl, unsigned char *p) {
    ((void) zl);

    /* "p" could be equal to ZIP_END, caused by ziplistDelete,
     * and we should return NULL. Otherwise, we should return NULL
     * when the *next* element is ZIP_END (there is no next entry). */
    if (p[0] == ZIP_END) {
        return NULL;
    }

    p += zipRawEntryLength(p);
    if (p[0] == ZIP_END) {
        return NULL;
    }

    return p;
}

/* Return pointer to previous entry in ziplist. */
//返回给定节点的前向节点指针
unsigned char *ziplistPrev(unsigned char *zl, unsigned char *p) {
    unsigned int prevlensize, prevlen = 0;

    /* Iterating backwards from ZIP_END should return the tail. When "p" is
     * equal to the first element of the list, we're already at the head,
     * and should return NULL. */
    if (p[0] == ZIP_END) {
        p = ZIPLIST_ENTRY_TAIL(zl);
        return (p[0] == ZIP_END) ? NULL : p;
    } else if (p == ZIPLIST_ENTRY_HEAD(zl)) {
        return NULL;
    } else {
        ZIP_DECODE_PREVLEN(p, prevlensize, prevlen);
        assert(prevlen > 0);
        return p-prevlen;
    }
}

ziplist.h：

unsigned char *ziplistNew(void);//创建一个新的ziplist
//将一个包含给定值的新节点推入ziplist的表头或者表尾
unsigned char *ziplistPush(unsigned char *zl, unsigned char *s, unsigned int slen, int where);
//返回给定索引的节点的内容指针
unsigned char *ziplistIndex(unsigned char *zl, int index);
//返回给定节点next节点的内容指针
unsigned char *ziplistNext(unsigned char *zl, unsigned char *p);
//返回给定节点prev节点的内容指针
unsigned char *ziplistPrev(unsigned char *zl, unsigned char *p);
//获取给定值节点
unsigned int ziplistGet(unsigned char *p, unsigned char **sval, unsigned int *slen, long long *lval);
//将一个包含给定值的新节点插入到ziplist
unsigned char *ziplistInsert(unsigned char *zl, unsigned char *p, unsigned char *s, unsigned int slen);
//删除给定值的节点
unsigned char *ziplistDelete(unsigned char *zl, unsigned char **p);
//删除给定索引范围的节点
unsigned char *ziplistDeleteRange(unsigned char *zl, unsigned int index, unsigned int num);
//比较
unsigned int ziplistCompare(unsigned char *p, unsigned char *s, unsigned int slen);
//在给定zipliast中查找并返回给定值的节点
unsigned char *ziplistFind(unsigned char *p, unsigned char *vstr, unsigned int vlen, unsigned int skip);
//返回给定ziplist的节点数量
unsigned int ziplistLen(unsigned char *zl);
//返回给定ziplist占用的内存字节数
size_t ziplistBlobLen(unsigned char *zl);

小结：

** 添加或删除ziplist的节点有可能会引发连锁更新，因此添加或者删除操作最坏复杂度为O(N^2)，不过因为连锁更新出现的概率并不高，所以一般可认为其复杂度为O(N)。

** ziplist是哈希键、列表键和有序集合的底层实现之一。