redis源码浅析--六-压缩列表

最新推荐文章于 2020-11-03 11:03:45 发布

wangxiaoangg

最新推荐文章于 2020-11-03 11:03:45 发布

阅读量265

点赞数 1

分类专栏： redis 文章标签： ziplist redis源码 c++

本文链接：https://blog.csdn.net/qq_16399991/article/details/106222976

版权

redis 专栏收录该内容

37 篇文章 3 订阅

订阅专栏

环境说明：redis源码版本 5.0.3；我在阅读源码过程做了注释，git地址：https://gitee.com/xiaoangg/redis_annotation

参考书籍：《redis的设计与实现》

文章推荐：
redis源码阅读-一--sds简单动态字符串
 redis源码阅读--二-链表
 redis源码阅读--三-redis散列表的实现
 redis源码浅析--四-redis跳跃表的实现
 redis源码浅析--五-整数集合的实现
 redis源码浅析--六-压缩列表
 redis源码浅析--七-redisObject对象(下)（内存回收、共享）
redis源码浅析--八-数据库的实现
 redis源码浅析--九-RDB持久化
 redis源码浅析--十-AOF（append only file）持久化
 redis源码浅析--十一.事件（上）文件事件
 redis源码浅析--十一.事件（下）时间事件
 redis源码浅析--十二.单机数据库的实现-客户端
 redis源码浅析--十三.单机数据库的实现-服务端 - 时间事件
 redis源码浅析--十三.单机数据库的实现-服务端 - redis服务器的初始化
 redis源码浅析--十四.多机数据库的实现（一）--新老版本复制功能的区别与实现原理
 redis源码浅析--十四.多机数据库的实现（二）--复制的实现SLAVEOF、PSYNY
redis源码浅析--十五.哨兵sentinel的设计与实现
 redis源码浅析--十六.cluster集群的设计与实现
 redis源码浅析--十七.发布与订阅的实现
 redis源码浅析--十八.事务的实现
 redis源码浅析--十九.排序的实现
 redis源码浅析--二十.BIT MAP的实现
 redis源码浅析--二十一.慢查询日志的实现
 redis源码浅析--二十二.监视器的实现

一.数据结构

1.压缩列表的构成

以ziplistNew（创建一个空的压缩列表）接口为入口，可以更容易的理解压缩列表的构成

//压缩列表的头部大小 = 一个uint32（记录列表总长度） + 一个uint32（到表尾节点需要偏移的字节数） + uint16(记录总节点数量)
/* The size of a ziplist header: two 32 bit integers for the total
 * bytes count and last item offset. One 16 bit integer for the number
 * of items field. */
#define ZIPLIST_HEADER_SIZE     (sizeof(uint32_t)*2+sizeof(uint16_t))


//返回zl的占用字节数，因为zl的首32位是存的是列表总字节数，所以去zl指针首32位内容就是zl占用的总字节数
/* Return total bytes a ziplist is composed of. */
#define ZIPLIST_BYTES(zl)       (*((uint32_t*)(zl)))


//从zl头部往后偏移sizeof(uint32_t)字节，用于记录从头部到尾部的字节长度
/* Return the offset of the last item inside the ziplist. */
#define ZIPLIST_TAIL_OFFSET(zl) (*((uint32_t*)((zl)+sizeof(uint32_t))))


//从头部往后偏移 sizeof(uint32_t)*2)字节，用一个uint16_t记录这个压缩列表的总长度；
/* Return the length of a ziplist, or UINT16_MAX if the length cannot be
 * determined without scanning the whole ziplist. */
#define ZIPLIST_LENGTH(zl)      (*((uint16_t*)((zl)+sizeof(uint32_t)*2)))


#define ZIP_END 255         /* Special "end of ziplist" entry. */


/* Create a new empty ziplist. */
unsigned char *ziplistNew(void) {
    unsigned int bytes = ZIPLIST_HEADER_SIZE+1; 
    unsigned char *zl = zmalloc(bytes);
    ZIPLIST_BYTES(zl) = intrev32ifbe(bytes); //zl的头32位记录 压缩列表占中总字节数
    ZIPLIST_TAIL_OFFSET(zl) = intrev32ifbe(ZIPLIST_HEADER_SIZE); //记录压缩列表的表尾节点到压缩列表启始节点有多少字节
    ZIPLIST_LENGTH(zl) = 0; 
    zl[bytes-1] = ZIP_END; //最后一位用255标记结束
    return zl;
}

2.压缩列表中节点的构成

2.1 previous_entry_len

previous_entry_len 属性的长度可以是一字节或者是5字节。
如果前一节点的长度小于254字节，那么previous_entry_len长度是一字节。
如果前一节点的长度大于等于254字节，那么previous_entry_len的长度是5字节，其中第一个字节会被设置为254，之后的四个字节用来存储前一节点的长度。
程序可以根据previous_entry_len属性，来计算前一节点的地址。
如有一个当前节点指针c，那么c减去previous_entry_len就是上一节点的地址。

上源码，计算previous_entry_len属性的位于ziplist.c 宏ZIP_DECODE_PREVLEN

//前一个条目的最大字节数；前一条目小于该值时，使用一个字节存储。 大与该值时使用4字节存储
#define ZIP_BIG_PREVLEN 254

/*
返回用于对上一条记录的长度进行编码的字节数。通过设置prevlensize返回长度。
例如返回5，说明ptr指向的前5个字节都是用来“编码记录”上一个节点的长度的
需要5个字节的话，第一个字节的值会被设置成254，后四个字节用来存储前一个节点的长度
*/
/* Return the number of bytes used to encode the length of the previous
 * entry. The length is returned by setting the var 'prevlensize'. */
#define ZIP_DECODE_PREVLENSIZE(ptr, prevlensize) do {                          \
    if ((ptr)[0] < ZIP_BIG_PREVLEN) {                                          \
        (prevlensize) = 1;                                                     \
    } else {                                                                   \
        (prevlensize) = 5;                                                     \
    }                                                                          \
} while(0);


/*
 解析ptr指向的节点的prevlensize和prevlen

 prevlensize：记录上一个节点长度 所需 编码字节
 prevlen：上一个节点的字节数量
*/
/* Return the length of the previous element, and the number of bytes that
 * are used in order to encode the previous element length.
 * 'ptr' must point to the prevlen prefix of an entry (that encodes the
 * length of the previous entry in order to navigate the elements backward).
 * The length of the previous entry is stored in 'prevlen', the number of
 * bytes needed to encode the previous entry length are stored in
 * 'prevlensize'. */
#define ZIP_DECODE_PREVLEN(ptr, prevlensize, prevlen) do {                     \
    ZIP_DECODE_PREVLENSIZE(ptr, prevlensize);                                  \
    if ((prevlensize) == 1) {                                                  \
        (prevlen) = (ptr)[0];                                                  \
    } else if ((prevlensize) == 5) {                                           \
        assert(sizeof((prevlen)) == 4);                                    \
        memcpy(&(prevlen), ((char*)(ptr)) + 1, 4);                             \
        memrev32ifbe(&prevlen);                                                \
    }                                                                          \
} while(0);

2.2 encoding

encoding记录所属节点的content的编码方式，以及长度。

encoding值的最高位为00、01、10 表示cotent的编码方式为字节数组。
content的数组长度由去除最高两位的后其他位记录。

上源码：


#define ZIP_STR_MASK 0xc0 // 0xc0 = 0B1100 0000; 字符串的编码方式的掩码，

/* Macro to determine if the entry is a string. String entries never start
 * with "11" as most significant bits of the first byte. */
#define ZIP_IS_STR(enc) (((enc) & ZIP_STR_MASK) < ZIP_STR_MASK)

encoding值的最高位以11开头，表示cotent的编码方式是整数。

整数编码
编码	encoding编码长度	content保存的值
1100 0000	1字节	int_16 类型的整数
1101 0000	1字节	int_32类型的整数
1110 0000	1字节	int_64类型的整数
1111 0000	1字节	24位的有符号整数
1111 1110	1字节	8位有符号整数
1111 xxxx	1字节	当值大于等于0 && 小于等于12时，使用该编码方式；这时节点编码没有content属性；值编码的到encoding属性中。

整数类型的编码方式可以阅读源码函数zipTryEncoding：

#define ZIP_INT_16B (0xc0 | 0 << 4) // 0B1100 0000 | 0B0 0000 = 1100 0000
#define ZIP_INT_32B (0xc0 | 1 << 4) // 0B1100 0000 |0B1 0000  = 1101 0000
#define ZIP_INT_64B (0xc0 | 2 << 4) // 0B1100 0000 |0B10 0000 = 1110 0000
#define ZIP_INT_24B (0xc0 | 3 << 4) // 0B1100 0000 |0B11 0000 = 1111 0000
#define ZIP_INT_8B 0xfe             // 0B11111110

/*
检查“entry”指向的字符串是否可以编码为整数。
将整数值存储在“v”中，将其编码存储在“encoding”中
*/
/* Check if string pointed to by 'entry' can be encoded as an integer.
 * Stores the integer value in 'v' and its encoding in 'encoding'. */
int zipTryEncoding(unsigned char *entry, unsigned int entrylen, long long *v, unsigned char *encoding) {
    long long value;
    
    if (entrylen >= 32 || entrylen == 0) return 0;
    if (string2ll((char*)entry,entrylen,&value)) {
        /* Great, the string can be encoded. Check what's the smallest
         * of our encoding types that can hold this value. */
        if (value >= 0 && value <= 12) { 
            *encoding = ZIP_INT_IMM_MIN+value;
        } else if (value >= INT8_MIN && value <= INT8_MAX) {// val值在 8位int范围内
            *encoding = ZIP_INT_8B;
        } else if (value >= INT16_MIN && value <= INT16_MAX) { // val值在 16位int范围内
            *encoding = ZIP_INT_16B;
        } else if (value >= INT24_MIN && value <= INT24_MAX) { // val值在 24位int范围内
            *encoding = ZIP_INT_24B;
        } else if (value >= INT32_MIN && value <= INT32_MAX) { // val值在 32位int范围内
            *encoding = ZIP_INT_32B;
        } else {
            *encoding = ZIP_INT_64B;
        }
        *v = value;
        return 1;
    }
    return 0;
}

2.3 content

content属性负责保存节点的值；

节点的值可以是整数，或是字节数组；

值的类型和长度存储在encoding属性中；

二连锁更新

上面说过，每个节点都有一个previous_entry_len属性记录上一节点的长度；

如果上一节点长度小于254字节，那么previous_entry_len 属性需要一个字节来存储；

如果上一节点长度大于等于254字节，那么previous_entry_len 属性需要五个字节来存储；

那么问题来了，如果列表中有多个连续的节点且长度介于250到253之间；在这个连续节点前插入一个长度大于等于 254节点节点，那么将导致后边连续的节点都无法存储上一节点的长度，引发连锁内存分配操作；

连锁更新最坏的情况下，需要对压缩链接表进行N次空间分配操作；每次空间分配最坏复杂度是O（n）；所以连锁更新的最坏时间复杂度是O（n*n）；

上连锁更新源码：


/* When an entry is inserted, we need to set the prevlen field of the next
 * entry to equal the length of the inserted entry. It can occur that this
 * length cannot be encoded in 1 byte and the next entry needs to be grow
 * a bit larger to hold the 5-byte encoded prevlen. This can be done for free,
 * because this only happens when an entry is already being inserted (which
 * causes a realloc and memmove). However, encoding the prevlen may require
 * that this entry is grown as well. This effect may cascade throughout
 * the ziplist when there are consecutive entries with a size close to
 * ZIP_BIG_PREVLEN, so we need to check that the prevlen can be encoded in
 * every consecutive entry.
 *
 * Note that this effect can also happen in reverse, where the bytes required
 * to encode the prevlen field can shrink. This effect is deliberately ignored,
 * because it can cause a "flapping" effect where a chain prevlen fields is
 * first grown and then shrunk again after consecutive inserts. Rather, the
 * field is allowed to stay larger than necessary, because a large prevlen
 * field implies the ziplist is holding large entries anyway.
 *
 * The pointer "p" points to the first entry that does NOT need to be
 * updated, i.e. consecutive fields MAY need an update. */
unsigned char *__ziplistCascadeUpdate(unsigned char *zl, unsigned char *p) {
    size_t curlen = intrev32ifbe(ZIPLIST_BYTES(zl)), rawlen, rawlensize;
    size_t offset, noffset, extra;
    unsigned char *np;
    zlentry cur, next;

    while (p[0] != ZIP_END) {
        zipEntry(p, &cur);
        rawlen = cur.headersize + cur.len;
        rawlensize = zipStorePrevEntryLength(NULL,rawlen);

        /* Abort if there is no next entry. */
        if (p[rawlen] == ZIP_END) break;
        zipEntry(p+rawlen, &next);

        /* Abort when "prevlen" has not changed. */
        if (next.prevrawlen == rawlen) break;

        if (next.prevrawlensize < rawlensize) {
            /* The "prevlen" field of "next" needs more bytes to hold
             * the raw length of "cur". */
            offset = p-zl;
            extra = rawlensize-next.prevrawlensize;
            zl = ziplistResize(zl,curlen+extra);
            p = zl+offset;

            /* Current pointer and offset for next element. */
            np = p+rawlen;
            noffset = np-zl;

            /* Update tail offset when next element is not the tail element. */
            if ((zl+intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))) != np) {
                ZIPLIST_TAIL_OFFSET(zl) =
                    intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+extra);
            }

            /* Move the tail to the back. */
            memmove(np+rawlensize,
                np+next.prevrawlensize,
                curlen-noffset-next.prevrawlensize-1);
            zipStorePrevEntryLength(np,rawlen);

            /* Advance the cursor */
            p += rawlen;
            curlen += extra;
        } else {
            if (next.prevrawlensize > rawlensize) {
                /* This would result in shrinking, which we want to avoid.
                 * So, set "rawlen" in the available bytes. */
                zipStorePrevEntryLengthLarge(p+rawlen,rawlen);
            } else {
                zipStorePrevEntryLength(p+rawlen,rawlen);
            }

            /* Stop here, as the raw length of "next" has not changed. */
            break;
        }
    }
    return zl;
}

三压缩列表API


unsigned char *ziplistNew(void); //创建一个新的压缩列表
unsigned char *ziplistMerge(unsigned char **first, unsigned char **second);
unsigned char *ziplistPush(unsigned char *zl, unsigned char *s, unsigned int slen, int where); //将一个值添加到列表， 头，或者列表尾部
unsigned char *ziplistIndex(unsigned char *zl, int index); //返回给定索引上的节点
unsigned char *ziplistNext(unsigned char *zl, unsigned char *p);//返回给定节点的下一个节点
unsigned char *ziplistPrev(unsigned char *zl, unsigned char *p);//返回给定节点的上一个节点
unsigned int ziplistGet(unsigned char *p, unsigned char **sval, unsigned int *slen, long long *lval); //获取给定节点保存的值
unsigned char *ziplistInsert(unsigned char *zl, unsigned char *p, unsigned char *s, unsigned int slen);//指定节点(p)后插入一个新节点(s),新节点s的长度（slen）
unsigned char *ziplistDelete(unsigned char *zl, unsigned char **p); //删除一个指定节点
unsigned char *ziplistDeleteRange(unsigned char *zl, int index, unsigned int num); //删除指定节点后连续多个节点
unsigned int ziplistCompare(unsigned char *p, unsigned char *s, unsigned int slen);
unsigned char *ziplistFind(unsigned char *p, unsigned char *vstr, unsigned int vlen, unsigned int skip); //查找并返回包含给定值的节点
unsigned int ziplistLen(unsigned char *zl); //返回列表包含的节点数量
size_t ziplistBlobLen(unsigned char *zl);//返回压缩列表占用的字节数量
void ziplistRepr(unsigned char *zl);

注：可以 ziplistInsert作为入口阅读；上边讲到的所有点都有在ziplistInsert函数中涉及；

wangxiaoangg

关注

1
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
redis源码浅析--六-压缩列表

环境说明：redis源码版本 5.0.3；我在阅读源码过程做了注释，git地址：https://gitee.com/xiaoangg/redis_annotation参考书籍：《redis的设计与实现》一.数据结构1.压缩列表的构成以ziplistNew（创建一个空的压缩列表）接口为入口，可以更容易的理解压缩列表的构成//压缩列表的头部大小 = 一个uint32（记录列表总长度） + 一个uint32（到表尾节点需要偏移的字节数） + uint16(记录总节点数量)/* Th.
复制链接

扫一扫