Redis——ziplist(压缩列表)

最新推荐文章于 2024-01-20 16:33:03 发布

脉望虫

最新推荐文章于 2024-01-20 16:33:03 发布

阅读量481

点赞数

分类专栏： Redis阅读文章标签： redis 数据库缓存

本文链接：https://blog.csdn.net/qq_36763031/article/details/125526597

版权

Redis阅读专栏收录该内容

7 篇文章 0 订阅

订阅专栏

ziplist

Redis中的压缩列表是一种非常高效的数据存储形式，数据存储紧凑，有效节约存储空间，数据全部以小端存储，详细的介绍可以参照：Redis底层数据结构之ziplist（压缩列表）

简介

整体布局

压缩列表的整体布局为：

<zlbytes> <zltail> <zllen> <entry> <entry> ... <entry> <zlend>

其中：
zlbytes：压缩列表的总字节数；
zltail：压缩列表到尾部的偏移量，头部的指针加上zltail可以直达最后一个entry；
zllen：entry的数目；
zlend：压缩列表的结尾，固定值0xFF;
entry:表示每一项；

entry

其中entry的构成为：

<prevlen> <encoding> <entry-data>

prevlen：前面一个entry的长度，这又分为两种情况：

当prevlen取值为[0,253]，entry的构成直接为：
```
 <prevlen from 0 to 253> <encoding> <entry>
```

`当prevlen取值大于等于254（0xFE）时，entry的构成为：

0xFE <4 bytes unsigned little endian prevlen> <encoding> <entry>

可以看出，prevlen的长度为1或5个字节，取决于前面一个entry的长度
encoding：编码类型，这个分为多种情况：

|00pppppp| - 1 byte：这是String类型，后面6位表示字符串的长度，最多也只能为63个字符；
|01pppppp|qqqqqqqq| - 2 bytes：这是String类型，后面14位表示字符串的长度，最多也只能为16383个字符；
|10000000|qqqqqqqq|rrrrrrrr|ssssssss|tttttttt| - 5 bytes：这是String类型，后面4个字节表示字符串的长度，最少为16384个字符，最多为2^32-1。注意：这个32位数是使用大端的方式来存储；
|11000000| - 3 bytes：这是整数类型，紧接着后面2个字节为int16_t (2 bytes)类型；
|11010000| - 5 bytes：这是整数类型，紧接着后面4个字节为int32_t (4 bytes)类型；
|11100000| - 9 bytes：这是整数类型，紧接着后面8个字节为int64_t (8 bytes)类型；
|11110000| - 4 bytes：这是整数类型，紧接着后面3个字节用来编码24位符号整数；
|11111110| - 2 bytes：这是整数类型，紧接着后面1个字节用来编码8位符号整数；
|1111xxxx| -1个字节：这是整数类型，紧接着后面4位用来编码8位符号整数，后面 xxxx 只能是位于 0001 到 1101，实际使用时减去1，用来表示0到12；
|11111111| - entry的结尾.
总的来看，当编码字符串时，需要存储字符串的长度；当编码整数时，位于0到12的数字不再需要额外的字节存储，也就是不需要entry-data.
上面所说的就对应这几个宏：

#define ZIP_STR_MASK 0xc0
#define ZIP_INT_MASK 0x30
#define ZIP_STR_06B (0 << 6)
#define ZIP_STR_14B (1 << 6)
#define ZIP_STR_32B (2 << 6)
#define ZIP_INT_16B (0xc0 | 0<<4)
#define ZIP_INT_32B (0xc0 | 1<<4)
#define ZIP_INT_64B (0xc0 | 2<<4)
#define ZIP_INT_24B (0xc0 | 3<<4)
#define ZIP_INT_8B 0xfe

下面就跟着代码，逐个解释：

结构体

由于数据的排列十分紧密并且每个entry的大小不固定，因此但凡涉及到插入删除之列的就需要获取前一个entry的长度等信息，因此需要一个结构体作为中间的载体：

//这个zlentry结构体仅仅是为了获取一个entry信息的载体，数据不存储在这里
//使用这个仅仅为了操作更便利
typedef struct zlentry {
    unsigned int prevrawlensize; //编码前一个entry长度的字节数
    unsigned int prevrawlen;     //前一个entry长度
    unsigned int lensize;        //编码当前entry长度的字节数，整数的都是1，字符串为1 2 5
    unsigned int len;            //表示实际entry的字节数，如果存储的是一个16位整数，len是2
    unsigned int headersize;     /* prevrawlensize + lensize. */
    unsigned char encoding;      //ZIP_STR_* 或者 ZIP_INT_* 
    unsigned char *p;            //指向一个entry最开始的地方
} zlentry;

创建

ziplistNew

//压缩列表的头信息：两个4字节的整数，一个表示总的字节数，一个表示最后一项的偏移量，一个2字节的数表示项的总数
#define ZIPLIST_HEADER_SIZE     (sizeof(uint32_t)*2+sizeof(uint16_t))
//结尾，一个字节0xFF
#define ZIPLIST_END_SIZE        (sizeof(uint8_t))

unsigned char *ziplistNew(void) {
    unsigned int bytes = ZIPLIST_HEADER_SIZE+ZIPLIST_END_SIZE;
    unsigned char *zl = zmalloc(bytes);
    //数据转成小端后开始赋值
    ZIPLIST_BYTES(zl) = intrev32ifbe(bytes);
    ZIPLIST_TAIL_OFFSET(zl) = intrev32ifbe(ZIPLIST_HEADER_SIZE);
    ZIPLIST_LENGTH(zl) = 0;
    zl[bytes-1] = ZIP_END;
    return zl;
}

插入新元素push

压缩列表创建完成后就需要进行插入内容

ziplistPush

#define ZIPLIST_HEAD 0
#define ZIPLIST_TAIL 1
//头部插入并返回入口的指针
#define ZIPLIST_ENTRY_HEAD(zl)  ((zl)+ZIPLIST_HEADER_SIZE)
//尾部插入返回尾部插入的指针，指向最后一个entry
#define ZIPLIST_ENTRY_TAIL(zl)  ((zl)+intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl)))
//指向压缩列表的结尾，就是那个0xFF
#define ZIPLIST_ENTRY_END(zl)   ((zl)+intrev32ifbe(ZIPLIST_BYTES(zl))-ZIPLIST_END_SIZE)

unsigned char *ziplistPush(unsigned char *zl, unsigned char *s, unsigned int slen, int where) {
    unsigned char *p;
    //p要么指向第一个entry，要么是压缩列表的结尾
    p = (where == ZIPLIST_HEAD) ? ZIPLIST_ENTRY_HEAD(zl) : ZIPLIST_ENTRY_END(zl);
    return __ziplistInsert(zl,p,s,slen);
}

__ziplistInsert

具体的实现函数，这个函数细节很多

#define ZIP_BIG_PREVLEN 254 /* ZIP_BIG_PREVLEN - 1 is the max number of bytes of
                               the previous entry, for the "prevlen" field prefixing
                               each entry, to be represented with just a single byte.
                               Otherwise it is represented as FE AA BB CC DD, where
                               AA BB CC DD are a 4 bytes unsigned integer
                               representing the previous entry len. */
//前面也说过，判断prevlen和254的关系来判断prevlen用几个字节存储
#define ZIP_DECODE_PREVLENSIZE(ptr, prevlensize) do {                          \
    if ((ptr)[0] < ZIP_BIG_PREVLEN) {                                          \
        (prevlensize) = 1;                                                     \
    } else {                                                                   \
        (prevlensize) = 5;                                                     \
    }                                                                          \
} while(0)
#define ZIP_DECODE_PREVLEN(ptr, prevlensize, prevlen) do {                     \
    ZIP_DECODE_PREVLENSIZE(ptr, prevlensize);                                  \
    if ((prevlensize) == 1) {                                                  \
        (prevlen) = (ptr)[0];                                                  \
    } else { /* prevlensize == 5 */                                            \       //这个就是前面提到的大端存储
        (prevlen) = ((ptr)[4] << 24) |                                         \
                    ((ptr)[3] << 16) |                                         \
                    ((ptr)[2] <<  8) |                                         \
                    ((ptr)[1]);                                                \
    }                                                                          \
} while(0)
unsigned char *__ziplistInsert(unsigned char *zl, unsigned char *p, unsigned char *s, unsigned int slen) {
    size_t curlen = intrev32ifbe(ZIPLIST_BYTES(zl)), reqlen, newlen;
    unsigned int prevlensize, prevlen = 0;
    size_t offset;
    int nextdiff = 0;
    unsigned char encoding = 0;
    long long value = 123456789; /* initialized to avoid warning. Using a value
                                    that is easy to see if for some reason
                                    we use it uninitialized. */
    zlentry tail;
	//这里特意判断了一下p[0]是不是位于压缩列表的尾部，只有该列表为空的时候这个条件才成立
    if (p[0] != ZIP_END) {
    //通过ZIP_DECODE_PREVLEN这个宏来解析压缩列表的长度
        ZIP_DECODE_PREVLEN(p, prevlensize, prevlen);
    } else {
    //获取最后一个entry的位置
        unsigned char *ptail = ZIPLIST_ENTRY_TAIL(zl);
        //p[0] == ZIP_END但是ptail[0] != ZIP_END表明有entry
        if (ptail[0] != ZIP_END) {
        //这个在下面解读
            prevlen = zipRawEntryLengthSafe(zl, curlen, ptail);
        }
    }

    //查看这个entry能否被编码为整数
    if (zipTryEncoding(s,slen,&value,&encoding)) {
        //replen表示即将插入的entry的实际内容（entry-data）所占的字节数
        reqlen = zipIntSize(encoding);
    } else {
        reqlen = slen;
    }
    //下一个entry的第一项就是前一个entry的长度
    reqlen += zipStorePrevEntryLength(NULL,prevlen);
    //encoding的长度
    reqlen += zipStoreEntryEncoding(NULL,encoding,slen);

	//replen就是当前要插入的entry的总字节数
    //当插入的位置不是位于压缩列表的尾部，我们应该确保下一个entry能够保存当前entry的长度
    int forcelarge = 0;
    //存储当前entry需要的字节数差值
    //如果是尾部插入，nextdiff直接是0
    nextdiff = (p[0] != ZIP_END) ? zipPrevLenByteDiff(p,reqlen) : 0;
    if (nextdiff == -4 && reqlen < 4) {
        nextdiff = 0;
        forcelarge = 1;
    }

    /* Store offset because a realloc may change the address of zl. */
    offset = p-zl;
    newlen = curlen+reqlen+nextdiff;
    zl = ziplistResize(zl,newlen);//调用zrealloc重新申请内存
    p = zl+offset;

    /* Apply memory move when necessary and update tail offset. */
    if (p[0] != ZIP_END) {
        /* Subtract one because of the ZIP_END bytes */
        memmove(p+reqlen,p-nextdiff,curlen-offset-1+nextdiff);

        /* Encode this entry's raw length in the next entry. */
        if (forcelarge)
            zipStorePrevEntryLengthLarge(p+reqlen,reqlen);
        else
            zipStorePrevEntryLength(p+reqlen,reqlen);

        /* Update offset for tail */
        ZIPLIST_TAIL_OFFSET(zl) =
            intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+reqlen);

        /* When the tail contains more than one entry, we need to take
         * "nextdiff" in account as well. Otherwise, a change in the
         * size of prevlen doesn't have an effect on the *tail* offset. */
        assert(zipEntrySafe(zl, newlen, p+reqlen, &tail, 1));
        if (p[reqlen+tail.headersize+tail.len] != ZIP_END) {
            ZIPLIST_TAIL_OFFSET(zl) =
                intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+nextdiff);
        }
    } else {
        /* This element will be the new tail. */
        ZIPLIST_TAIL_OFFSET(zl) = intrev32ifbe(p-zl);
    }

    /* When nextdiff != 0, the raw length of the next entry has changed, so
     * we need to cascade the update throughout the ziplist */
    if (nextdiff != 0) {
        offset = p-zl;
        zl = __ziplistCascadeUpdate(zl,p+reqlen);
        p = zl+offset;
    }

    /* Write the entry */
    p += zipStorePrevEntryLength(p,prevlen);
    p += zipStoreEntryEncoding(p,encoding,slen);
    if (ZIP_IS_STR(encoding)) {
        memcpy(p,s,slen);
    } else {
        zipSaveInteger(p,value,encoding);
    }
    ZIPLIST_INCR_LENGTH(zl,1);
    return zl;
}

zipRawEntryLengthSafe

//返回p指向entry的字节数
static inline unsigned int zipRawEntryLengthSafe(unsigned char* zl, size_t zlbytes, unsigned char *p) {
    zlentry e;
    assert(zipEntrySafe(zl, zlbytes, p, &e, 0));
    return e.headersize + e.len;
}
//对于个一个传入的结构体e，将当前entry的基本信息存储到e中，函数是安全的，即使指的位置不对，也不会访问压缩列表之外的数据，当前entry有效的话返回1，不然就是0
static inline int zipEntrySafe(unsigned char* zl, size_t zlbytes, unsigned char *p, zlentry *e, int validate_prevlen) {
//注意：这里的p是之前的ptail，也就是最后一个entry的指针
    unsigned char *zlfirst = zl + ZIPLIST_HEADER_SIZE;
    unsigned char *zllast = zl + zlbytes - ZIPLIST_END_SIZE;
#define OUT_OF_RANGE(p) (unlikely((p) < zlfirst || (p) > zllast))

    // 采取最快速的路径
    if (p >= zlfirst && p + 10 < zllast) {
        ZIP_DECODE_PREVLEN(p, e->prevrawlensize, e->prevrawlen);
        ZIP_ENTRY_ENCODING(p + e->prevrawlensize, e->encoding);
        ZIP_DECODE_LENGTH(p + e->prevrawlensize, e->encoding, e->lensize, e->len);
        //之前entry的长度加上+encoding的字节数
        e->headersize = e->prevrawlensize + e->lensize;
        e->p = p;
        /* We didn't call ZIP_ASSERT_ENCODING, so we check lensize was set to 0. */
        if (unlikely(e->lensize == 0))
            return 0;
        /* Make sure the entry doesn't reach outside the edge of the ziplist */
        if (OUT_OF_RANGE(p + e->headersize + e->len))
            return 0;
        /* Make sure prevlen doesn't reach outside the edge of the ziplist */
        if (validate_prevlen && OUT_OF_RANGE(p - e->prevrawlen))
            return 0;
        return 1;
    }

    /* Make sure the pointer doesn't reach outside the edge of the ziplist */
    if (OUT_OF_RANGE(p))
        return 0;

    /* Make sure the encoded prevlen header doesn't reach outside the allocation */
    ZIP_DECODE_PREVLENSIZE(p, e->prevrawlensize);
    if (OUT_OF_RANGE(p + e->prevrawlensize))
        return 0;

    /* Make sure encoded entry header is valid. */
    ZIP_ENTRY_ENCODING(p + e->prevrawlensize, e->encoding);
    e->lensize = zipEncodingLenSize(e->encoding);
    if (unlikely(e->lensize == ZIP_ENCODING_SIZE_INVALID))
        return 0;

    /* Make sure the encoded entry header doesn't reach outside the allocation */
    if (OUT_OF_RANGE(p + e->prevrawlensize + e->lensize))
        return 0;

    /* Decode the prevlen and entry len headers. */
    ZIP_DECODE_PREVLEN(p, e->prevrawlensize, e->prevrawlen);
    ZIP_DECODE_LENGTH(p + e->prevrawlensize, e->encoding, e->lensize, e->len);
    e->headersize = e->prevrawlensize + e->lensize;

    /* Make sure the entry doesn't reach outside the edge of the ziplist */
    if (OUT_OF_RANGE(p + e->headersize + e->len))
        return 0;

    /* Make sure prevlen doesn't reach outside the edge of the ziplist */
    if (validate_prevlen && OUT_OF_RANGE(p - e->prevrawlen))
        return 0;

    e->p = p;
    return 1;
#undef OUT_OF_RANGE
}

尝试编码这个字符串的函数zipTryEncoding

int zipTryEncoding(unsigned char *entry, unsigned int entrylen, long long *v, unsigned char *encoding) {
    long long value;
	//超过32位，这个字符串无法被表示为整数
    if (entrylen >= 32 || entrylen == 0) return 0;
    //将这个字符串表示为整数是可行的
    if (string2ll((char*)entry,entrylen,&value)) {
        /* Great, the string can be encoded. Check what's the smallest
         * of our encoding types that can hold this value. */
        if (value >= 0 && value <= 12) {
            *encoding = ZIP_INT_IMM_MIN+value;
        } else if (value >= INT8_MIN && value <= INT8_MAX) {
            *encoding = ZIP_INT_8B;
        } else if (value >= INT16_MIN && value <= INT16_MAX) {
            *encoding = ZIP_INT_16B;
        } else if (value >= INT24_MIN && value <= INT24_MAX) {
            *encoding = ZIP_INT_24B;
        } else if (value >= INT32_MIN && value <= INT32_MAX) {
            *encoding = ZIP_INT_32B;
        } else {
            *encoding = ZIP_INT_64B;
        }
        *v = value;
        return 1;
    }
    return 0;
}

删除元素

/* Delete a single entry from the ziplist, pointed to by *p.
 * Also update *p in place, to be able to iterate over the
 * ziplist, while deleting entries. */
//这里使用的是二级指针，*p指向的单个entry,二级指针的目的是为了更新*p
unsigned char *ziplistDelete(unsigned char *zl, unsigned char **p) {
    size_t offset = *p-zl;
    zl = __ziplistDelete(zl,*p,1);

    /* Store pointer to current element in p, because ziplistDelete will
     * do a realloc which might result in a different "zl"-pointer.
     * When the delete direction is back to front, we might delete the last
     * entry and end up with "p" pointing to ZIP_END, so check this. */
    *p = zl+offset;
    return zl;
}

/* Delete "num" entries, starting at "p". Returns pointer to the ziplist. */
unsigned char *__ziplistDelete(unsigned char *zl, unsigned char *p, unsigned int num) {
    unsigned int i, totlen, deleted = 0;
    size_t offset;
    int nextdiff = 0;
    zlentry first, tail;
    size_t zlbytes = intrev32ifbe(ZIPLIST_BYTES(zl));

    zipEntry(p, &first); /* no need for "safe" variant since the input pointer was validated by the function that returned it. */
    for (i = 0; p[0] != ZIP_END && i < num; i++) {
        p += zipRawEntryLengthSafe(zl, zlbytes, p);
        deleted++;
    }

    assert(p >= first.p);
    totlen = p-first.p; /* Bytes taken by the element(s) to delete. */
    if (totlen > 0) {
        uint32_t set_tail;
        if (p[0] != ZIP_END) {
            /* Storing `prevrawlen` in this entry may increase or decrease the
             * number of bytes required compare to the current `prevrawlen`.
             * There always is room to store this, because it was previously
             * stored by an entry that is now being deleted. */
            nextdiff = zipPrevLenByteDiff(p,first.prevrawlen);

            /* Note that there is always space when p jumps backward: if
             * the new previous entry is large, one of the deleted elements
             * had a 5 bytes prevlen header, so there is for sure at least
             * 5 bytes free and we need just 4. */
            p -= nextdiff;
            assert(p >= first.p && p<zl+zlbytes-1);
            zipStorePrevEntryLength(p,first.prevrawlen);

            /* Update offset for tail */
            set_tail = intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))-totlen;

            /* When the tail contains more than one entry, we need to take
             * "nextdiff" in account as well. Otherwise, a change in the
             * size of prevlen doesn't have an effect on the *tail* offset. */
            assert(zipEntrySafe(zl, zlbytes, p, &tail, 1));
            if (p[tail.headersize+tail.len] != ZIP_END) {
                set_tail = set_tail + nextdiff;
            }

            /* Move tail to the front of the ziplist */
            /* since we asserted that p >= first.p. we know totlen >= 0,
             * so we know that p > first.p and this is guaranteed not to reach
             * beyond the allocation, even if the entries lens are corrupted. */
            size_t bytes_to_move = zlbytes-(p-zl)-1;
            memmove(first.p,p,bytes_to_move);
        } else {
            /* The entire tail was deleted. No need to move memory. */
            set_tail = (first.p-zl)-first.prevrawlen;
        }

        /* Resize the ziplist */
        offset = first.p-zl;
        zlbytes -= totlen - nextdiff;
        zl = ziplistResize(zl, zlbytes);
        p = zl+offset;

        /* Update record count */
        ZIPLIST_INCR_LENGTH(zl,-deleted);

        /* Set the tail offset computed above */
        assert(set_tail <= zlbytes - ZIPLIST_END_SIZE);
        ZIPLIST_TAIL_OFFSET(zl) = intrev32ifbe(set_tail);

        /* When nextdiff != 0, the raw length of the next entry has changed, so
         * we need to cascade the update throughout the ziplist */
        if (nextdiff != 0)
            zl = __ziplistCascadeUpdate(zl,p);
    }
    return zl;
}