前面我们聊了压缩列表的源码解析,留了一个坑,就是API当中的新增、删除和查找,那么本篇我们就来深入分析下这三个API
API详解
API中,插入删除更新的源码比较复杂,我们本篇深入讲解下这三个API
ziplistInsert(插入)
首先看到插入方法,是直接调用了另外一个插入,我们直接来看这个方法
unsigned char *__ziplistInsert(unsigned char *zl, unsigned char *p, unsigned char *s, unsigned int slen) {
size_t curlen = intrev32ifbe(ZIPLIST_BYTES(zl)), reqlen, newlen;
unsigned int prevlensize, prevlen = 0;
size_t offset;
int nextdiff = 0;
unsigned char encoding = 0;
long long value = 123456789; /* initialized to avoid warning. Using a value
that is easy to see if for some reason
we use it uninitialized. */
zlentry tail;
/* Find out prevlen for the entry that is inserted. */
if (p[0] != ZIP_END) {
ZIP_DECODE_PREVLEN(p, prevlensize, prevlen);
} else {
unsigned char *ptail = ZIPLIST_ENTRY_TAIL(zl);
if (ptail[0] != ZIP_END) {
prevlen = zipRawEntryLengthSafe(zl, curlen, ptail);
}
}
/* See if the entry can be encoded */
if (zipTryEncoding(s,slen,&value,&encoding)) {
/* 'encoding' is set to the appropriate integer encoding */
reqlen = zipIntSize(encoding);
} else {
/* 'encoding' is untouched, however zipStoreEntryEncoding will use the
* string length to figure out how to encode it. */
reqlen = slen;
}
/* We need space for both the length of the previous entry and
* the length of the payload. */
reqlen += zipStorePrevEntryLength(NULL,prevlen);
reqlen += zipStoreEntryEncoding(NULL,encoding,slen);
/* When the insert position is not equal to the tail, we need to
* make sure that the next entry can hold this entry's length in
* its prevlen field. */
int forcelarge = 0;
nextdiff = (p[0] != ZIP_END) ? zipPrevLenByteDiff(p,reqlen) : 0;
if (nextdiff == -4 && reqlen < 4) {
nextdiff = 0;
forcelarge = 1;
}
/* Store offset because a realloc may change the address of zl. */
offset = p-zl;
newlen = curlen+reqlen+nextdiff;
zl = ziplistResize(zl,newlen);
p = zl+offset;
/* Apply memory move when necessary and update tail offset. */
if (p[0] != ZIP_END) {
/* Subtract one because of the ZIP_END bytes */
memmove(p+reqlen,p-nextdiff,curlen-offset-1+nextdiff);
/* Encode this entry's raw length in the next entry. */
if (forcelarge)
zipStorePrevEntryLengthLarge(p+reqlen,reqlen);
else
zipStorePrevEntryLength(p+reqlen,reqlen);
/* Update offset for tail */
ZIPLIST_TAIL_OFFSET(zl) =
intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+reqlen);
/* When the tail contains more than one entry, we need to take
* "nextdiff" in account as well. Otherwise, a change in the
* size of prevlen doesn't have an effect on the *tail* offset. */
assert(zipEntrySafe(zl, newlen, p+reqlen, &tail, 1));
if (p[reqlen+tail.headersize+tail.len] != ZIP_END) {
ZIPLIST_TAIL_OFFSET(zl) =
intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+nextdiff);
}
} else {
/* This element will be the new tail. */
ZIPLIST_TAIL_OFFSET(zl) = intrev32ifbe(p-zl);
}
/* When nextdiff != 0, the raw length of the next entry has changed, so
* we need to cascade the update throughout the ziplist */
if (nextdiff != 0) {
offset = p-zl;
zl = __ziplistCascadeUpdate(zl,p+reqlen);
p = zl+offset;
}
/* Write the entry */
p += zipStorePrevEntryLength(p,prevlen);
p += zipStoreEntryEncoding(p,encoding,slen);
if (ZIP_IS_STR(encoding)) {
memcpy(p,s,slen);
} else {
zipSaveInteger(p,value,encoding);
}
ZIPLIST_INCR_LENGTH(zl,1);
return zl;
}
这里的代码较长,我们分三个阶段来做分析:
1、编码
2、重新分配空间
3、数据复制
这里的级联更新我们下面会讲到
ziplistDelete(删除)
这里我们也直接看__ziplistDelete方法
unsigned char * __ziplistDelete(unsigned char *zl, unsigned char *p, unsigned int num) {
unsigned int i, totlen, deleted = 0;
size_t offset;
int nextdiff = 0;
zlentry first, tail;
size_t zlbytes = intrev32ifbe(ZIPLIST_BYTES(zl));
zipEntry(p, &first); /* no need for "safe" variant since the input pointer was validated by the function that returned it. */
for (i = 0; p[0] != ZIP_END && i < num; i++) {
p += zipRawEntryLengthSafe(zl, zlbytes, p);
deleted++;
}
assert(p >= first.p);
totlen = p-first.p; /* Bytes taken by the element(s) to delete. */
if (totlen > 0) {
uint32_t set_tail;
if (p[0] != ZIP_END) {
/* Storing `prevrawlen` in this entry may increase or decrease the
* number of bytes required compare to the current `prevrawlen`.
* There always is room to store this, because it was previously
* stored by an entry that is now being deleted. */
nextdiff = zipPrevLenByteDiff(p,first.prevrawlen);
/* Note that there is always space when p jumps backward: if
* the new previous entry is large, one of the deleted elements
* had a 5 bytes prevlen header, so there is for sure at least
* 5 bytes free and we need just 4. */
p -= nextdiff;
assert(p >= first.p && p<zl+zlbytes-1);
zipStorePrevEntryLength(p,first.prevrawlen);
/* Update offset for tail */
set_tail = intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))-totlen;
/* When the tail contains more than one entry, we need to take
* "nextdiff" in account as well. Otherwise, a change in the
* size of prevlen doesn't have an effect on the *tail* offset. */
assert(zipEntrySafe(zl, zlbytes, p, &tail, 1));
if (p[tail.headersize+tail.len] != ZIP_END) {
set_tail = set_tail + nextdiff;
}
/* Move tail to the front of the ziplist */
/* since we asserted that p >= first.p. we know totlen >= 0,
* so we know that p > first.p and this is guaranteed not to reach
* beyond the allocation, even if the entries lens are corrupted. */
size_t bytes_to_move = zlbytes-(p-zl)-1;
memmove(first.p,p,bytes_to_move);
} else {
/* The entire tail was deleted. No need to move memory. */
set_tail = (first.p-zl)-first.prevrawlen;
}
/* Resize the ziplist */
offset = first.p-zl;
zlbytes -= totlen - nextdiff;
zl = ziplistResize(zl, zlbytes);
p = zl+offset;
/* Update record count */
ZIPLIST_INCR_LENGTH(zl,-deleted);
/* Set the tail offset computed above */
assert(set_tail <= zlbytes - ZIPLIST_END_SIZE);
ZIPLIST_TAIL_OFFSET(zl) = intrev32ifbe(set_tail);
/* When nextdiff != 0, the raw length of the next entry has changed, so
* we need to cascade the update throughout the ziplist */
if (nextdiff != 0)
zl = __ziplistCascadeUpdate(zl,p);
}
return zl;
}
1、先计算出要删除的元素的长度,这里的逻辑是比较简单的
大致步骤为:解码待删除元素-遍历所有待删除元素-计算待删除元素长度
2、数据复制
3、重新分配空间
ziplistFind(查找)
查找的方法其实相对插入和删除来说简单的多
unsigned char *ziplistFind(unsigned char *zl, unsigned char *p, unsigned char *vstr, unsigned int vlen, unsigned int skip) {
int skipcnt = 0;
unsigned char vencoding = 0;
long long vll = 0;
size_t zlbytes = ziplistBlobLen(zl);
while (p[0] != ZIP_END) {
struct zlentry e;
unsigned char *q;
assert(zipEntrySafe(zl, zlbytes, p, &e, 1));
q = p + e.prevrawlensize + e.lensize;
if (skipcnt == 0) {
/* Compare current entry with specified entry */
if (ZIP_IS_STR(e.encoding)) {
if (e.len == vlen && memcmp(q, vstr, vlen) == 0) {
return p;
}
} else {
/* Find out if the searched field can be encoded. Note that
* we do it only the first time, once done vencoding is set
* to non-zero and vll is set to the integer value. */
if (vencoding == 0) {
if (!zipTryEncoding(vstr, vlen, &vll, &vencoding)) {
/* If the entry can't be encoded we set it to
* UCHAR_MAX so that we don't retry again the next
* time. */
vencoding = UCHAR_MAX;
}
/* Must be non-zero by now */
assert(vencoding);
}
/* Compare current entry with specified entry, do it only
* if vencoding != UCHAR_MAX because if there is no encoding
* possible for the field it can't be a valid integer. */
if (vencoding != UCHAR_MAX) {
long long ll = zipLoadInteger(q, e.encoding);
if (ll == vll) {
return p;
}
}
}
/* Reset skip count */
skipcnt = skip;
} else {
/* Skip entry */
skipcnt--;
}
/* Move to next entry */
p = q + e.len;
}
return NULL;
}
1、计算节点属性
2、判断节点类型
3、如果是字符串,对比内容
4、如果是整数,对比数值
5、找到就返回,找不到就指向下一个
这里的逻辑不算复杂,我们整合为一个图来讲解
级联更新
出现原因
出现级联更新,主要是因为prevlensize,假设我们要删除某个entry,prevlensize是会变更的。
我们参照这张图举个例子:
假设此时我们需要删除entryX,entryX是128字节,entyrX-1为512字节,entyrX+1为253字节。我们上文讲过prevlensize,这里entryX的prevlensize为1,而现在prevlensize变为了5,而这样就需要给entyrX+1后的所有节点全部向后移,也就是nextdiff。
流程
我们先看下代码
unsigned char *__ziplistCascadeUpdate(unsigned char *zl, unsigned char *p) {
zlentry cur;
size_t prevlen, prevlensize, prevoffset; /* Informat of the last changed entry. */
size_t firstentrylen; /* Used to handle insert at head. */
size_t rawlen, curlen = intrev32ifbe(ZIPLIST_BYTES(zl));
size_t extra = 0, cnt = 0, offset;
size_t delta = 4; /* Extra bytes needed to update a entry's prevlen (5-1). */
unsigned char *tail = zl + intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl));
/* Empty ziplist */
if (p[0] == ZIP_END) return zl;
zipEntry(p, &cur); /* no need for "safe" variant since the input pointer was validated by the function that returned it. */
firstentrylen = prevlen = cur.headersize + cur.len;
prevlensize = zipStorePrevEntryLength(NULL, prevlen);
prevoffset = p - zl;
p += prevlen;
/* Iterate ziplist to find out how many extra bytes do we need to update it. */
while (p[0] != ZIP_END) {
assert(zipEntrySafe(zl, curlen, p, &cur, 0));
/* Abort when "prevlen" has not changed. */
if (cur.prevrawlen == prevlen) break;
/* Abort when entry's "prevlensize" is big enough. */
if (cur.prevrawlensize >= prevlensize) {
if (cur.prevrawlensize == prevlensize) {
zipStorePrevEntryLength(p, prevlen);
} else {
/* This would result in shrinking, which we want to avoid.
* So, set "prevlen" in the available bytes. */
zipStorePrevEntryLengthLarge(p, prevlen);
}
break;
}
/* cur.prevrawlen means cur is the former head entry. */
assert(cur.prevrawlen == 0 || cur.prevrawlen + delta == prevlen);
/* Update prev entry's info and advance the cursor. */
rawlen = cur.headersize + cur.len;
prevlen = rawlen + delta;
prevlensize = zipStorePrevEntryLength(NULL, prevlen);
prevoffset = p - zl;
p += rawlen;
extra += delta;
cnt++;
}
/* Extra bytes is zero all update has been done(or no need to update). */
if (extra == 0) return zl;
/* Update tail offset after loop. */
if (tail == zl + prevoffset) {
/* When the last entry we need to update is also the tail, update tail offset
* unless this is the only entry that was updated (so the tail offset didn't change). */
if (extra - delta != 0) {
ZIPLIST_TAIL_OFFSET(zl) =
intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+extra-delta);
}
} else {
/* Update the tail offset in cases where the last entry we updated is not the tail. */
ZIPLIST_TAIL_OFFSET(zl) =
intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+extra);
}
/* Now "p" points at the first unchanged byte in original ziplist,
* move data after that to new ziplist. */
offset = p - zl;
zl = ziplistResize(zl, curlen + extra);
p = zl + offset;
memmove(p + extra, p, curlen - offset - 1);
p += extra;
/* Iterate all entries that need to be updated tail to head. */
while (cnt) {
zipEntry(zl + prevoffset, &cur); /* no need for "safe" variant since we already iterated on all these entries above. */
rawlen = cur.headersize + cur.len;
/* Move entry to tail and reset prevlen. */
memmove(p - (rawlen - cur.prevrawlensize),
zl + prevoffset + cur.prevrawlensize,
rawlen - cur.prevrawlensize);
p -= (rawlen + delta);
if (cur.prevrawlen == 0) {
/* "cur" is the previous head entry, update its prevlen with firstentrylen. */
zipStorePrevEntryLength(p, firstentrylen);
} else {
/* An entry's prevlen can only increment 4 bytes. */
zipStorePrevEntryLength(p, cur.prevrawlen+delta);
}
/* Forward to previous entry. */
prevoffset -= cur.prevrawlen;
cnt--;
}
return zl;
}
__ziplistCascadeUpdate方法,这里的逻辑我们就直接放在一张图里
其实大家有注意到的话,可以看到,插入和删除的方法里,都会用到这个方法,所以其实是可以串联起来的,也建议大家去翻看源码稍微过一下。
感谢阅读,如果有任何疑问或不同意见,欢迎留言讨论。