[redis] 压缩列表(ziplist)
ziplist.h
ziplist.c
layout
/* Each entry in the ziplist is either a string or an integer. */
typedef struct {
/* When string is used, it is provided with the length (slen). */
unsigned char *sval;
unsigned int slen;
/* When integer is used, 'sval' is NULL, and lval holds the value. */
long long lval;
} ziplistEntry;
ziplist支持string和integer values,要求每一个操作都需要支持reallocation。
ziplist layout:
prevlen
为了实现双链表的特性,ziplist是通过记录prev节点的长度来做的。在每一个节点(char *ptr)中,我们只要判断第一个字节是否小于0xff,如果小于0xff,那么prevlen就是ptr[0]。否则,节点长度大于等于0xff,这时则计算ptr后的4个字节(小端存储)。
/* Return the number of bytes used to encode the length of the previous
* entry. The length is returned by setting the var 'prevlensize'. */
#define ZIP_DECODE_PREVLENSIZE(ptr, prevlensize) do { \
if ((ptr)[0] < ZIP_BIG_PREVLEN) { \
(prevlensize) = 1; \
} else { \
(prevlensize) = 5; \
} \
} while(0)
/* Return the length of the previous element, and the number of bytes that
* are used in order to encode the previous element length.
* 'ptr' must point to the prevlen prefix of an entry (that encodes the
* length of the previous entry in order to navigate the elements backward).
* The length of the previous entry is stored in 'prevlen', the number of
* bytes needed to encode the previous entry length are stored in
* 'prevlensize'. */
#define ZIP_DECODE_PREVLEN(ptr, prevlensize, prevlen) do { \
ZIP_DECODE_PREVLENSIZE(ptr, prevlensize); \
if ((prevlensize) == 1) { \
(prevlen) = (ptr)[0]; \
} else { /* prevlensize == 5 */ \
(prevlen) = ((ptr)[4] << 24) | \
((ptr)[3] << 16) | \
((ptr)[2] << 8) | \
((ptr)[1]); \
} \
} while(0)
encoding_string
标识 | encoding长度 | 字符串长度 | 描述 | 注意 |
---|---|---|---|---|
|00xxxxxx| | 1 byte | ≤63字节,2^6-1 | encoding使用一个自己保存,前2bit是0,后6bit表示字符串长度 | |
|01xxxxxx|xxxxxxxx| | 2 bytes | ≤16383 字节,2^14-1 | 大端存储 | |
|10xxxxxx|xxxxxxxx|xxxxxxxx|xxxxxxxx|xxxxxxxx| | 5 bytes | >16383 字节 | 大端存储 |
encoding_integer
标识 | encoding长度 | 数值长度 | 描述 | 注意 |
---|---|---|---|---|
|1100xxxx| | 3 bytes | 2bytes | 高2bit表示integer,接着两个bit表示这是int16_t | |
|1101xxxx| | 5 bytes | 4bytes | 高2bit表示integer,接着两个bit表示这是int32_t | |
|1110xxxx| | 9 bytes | 8bytes | 高2bit表示integer,接着两个bit表示这是int64_t | |
|11110000| | 4 bytes | 3bytes | 高2bit表示integer,接着两个bit表示这是int24_t | |
|11111110| | 2 bytes | 1byte | 高2bit表示integer,接着两个bit表示这是int8_t | |
|1111xxxx| | 1 bytes | 4byte | ||
|11111111| | 1 bytes | 0byte | 列表结束 |
从结构上,ziplist我们可以将它看作一个vector,只不过这个vector支持string和integer,那么当我们在中间插入元素时,对于vector来说,则需要移动一些元素。对于ziplist来说,也是类似的。
insert
/* Insert item at "p". */
unsigned char *__ziplistInsert(unsigned char *zl, unsigned char *p, unsigned char *s, unsigned int slen) {
size_t curlen = intrev32ifbe(ZIPLIST_BYTES(zl)), reqlen, newlen;
unsigned int prevlensize, prevlen = 0;
size_t offset;
int nextdiff = 0;
unsigned char encoding = 0;
long long value = 123456789; /* initialized to avoid warning. Using a value
that is easy to see if for some reason
we use it uninitialized. */
zlentry tail;
/* Find out prevlen for the entry that is inserted. */
if (p[0] != ZIP_END) {
ZIP_DECODE_PREVLEN(p, prevlensize, prevlen);
} else {
unsigned char *ptail = ZIPLIST_ENTRY_TAIL(zl);
if (ptail[0] != ZIP_END) {
prevlen = zipRawEntryLengthSafe(zl, curlen, ptail);
}
}
/* See if the entry can be encoded */
if (zipTryEncoding(s,slen,&value,&encoding)) {
/* 'encoding' is set to the appropriate integer encoding */
reqlen = zipIntSize(encoding);
} else {
/* 'encoding' is untouched, however zipStoreEntryEncoding will use the
* string length to figure out how to encode it. */
reqlen = slen;
}
/* We need space for both the length of the previous entry and
* the length of the payload. */
reqlen += zipStorePrevEntryLength(NULL,prevlen);
reqlen += zipStoreEntryEncoding(NULL,encoding,slen);
/* When the insert position is not equal to the tail, we need to
* make sure that the next entry can hold this entry's length in
* its prevlen field. */
int forcelarge = 0;
nextdiff = (p[0] != ZIP_END) ? zipPrevLenByteDiff(p,reqlen) : 0;
if (nextdiff == -4 && reqlen < 4) {
nextdiff = 0;
forcelarge = 1;
}
/* Store offset because a realloc may change the address of zl. */
offset = p-zl;
newlen = curlen+reqlen+nextdiff;
zl = ziplistResize(zl,newlen);
p = zl+offset;
/* Apply memory move when necessary and update tail offset. */
if (p[0] != ZIP_END) {
/* Subtract one because of the ZIP_END bytes */
memmove(p+reqlen,p-nextdiff,curlen-offset-1+nextdiff);
/* Encode this entry's raw length in the next entry. */
if (forcelarge)
zipStorePrevEntryLengthLarge(p+reqlen,reqlen);
else
zipStorePrevEntryLength(p+reqlen,reqlen);
/* Update offset for tail */
ZIPLIST_TAIL_OFFSET(zl) =
intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+reqlen);
/* When the tail contains more than one entry, we need to take
* "nextdiff" in account as well. Otherwise, a change in the
* size of prevlen doesn't have an effect on the *tail* offset. */
assert(zipEntrySafe(zl, newlen, p+reqlen, &tail, 1));
if (p[reqlen+tail.headersize+tail.len] != ZIP_END) {
ZIPLIST_TAIL_OFFSET(zl) =
intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+nextdiff);
}
} else {
/* This element will be the new tail. */
ZIPLIST_TAIL_OFFSET(zl) = intrev32ifbe(p-zl);
}
/* When nextdiff != 0, the raw length of the next entry has changed, so
* we need to cascade the update throughout the ziplist */
if (nextdiff != 0) {
offset = p-zl;
zl = __ziplistCascadeUpdate(zl,p+reqlen);
p = zl+offset;
}
/* Write the entry */
p += zipStorePrevEntryLength(p,prevlen);
p += zipStoreEntryEncoding(p,encoding,slen);
if (ZIP_IS_STR(encoding)) {
memcpy(p,s,slen);
} else {
zipSaveInteger(p,value,encoding);
}
ZIPLIST_INCR_LENGTH(zl,1);
return zl;
}
原理是当我们在某个node插入元素时,我们需要使用realloc重新分配内存,然后将node之后的内存memmove到新的ptr上,最后将要插入的元素复制到node上即可。