一、quicklist
再看一下quicklist,它是从Redis3.2才提供的一个数据结构。从字面意思上理解,这个应该比list快。但是同样是list,为什么它要快?就得找一下原因。在普通的list中,可以通过拥有的前向和后向指针进行前后的遍历和查找。但是,当数据量大时,这两个指针占用的空间就非常明显了。而在前面的ziplist中,可以看到,通过指示本Entry的长度配合相关标识,就可以去除这两个指针来进行遍历。但同样也要看到,这是有代价的,代价就是,在插入和删除时,空间的不断的动态变化导致的内存空间的频繁变化和调整以及数据的迁移变化,产生巨大的时间和空间的损失。
那么可不可以把二者结合起来,找一个合适的平衡点,既能体现出list的方便性,又能体现ziplist的空间优势呢?quicklist就这样产生了。可以理解为把一个list分成多个段,每个段按照ziplist的形式来组织数据,当把数据的大小控制在ziplist的理想范围内时,优势就体现出来了。
换句话说,从整体上来看,quicklist仍然是list,具备list的优势,插入和删除方便,进出两端的速度很快(O(N));从具体的细节来看,每个list中的节点又都是一个个的zlentry,数据连续平坦,这时候儿可以使用二分查找来快速定位相关数据。这从某种程度来说,综合体现了二者结合的优势。
二、源码分析
说一千道一万,还得看源码(quicklist.h,quicklistcpp):
1、定义
typedef struct quicklistNode {
struct quicklistNode * prev;//前向和后向的节点指针
struct quicklistNode * next;
unsigned char * zl;//压缩参数recopress设置,指向内容quicklistLZF,否则指向内容ziplist
unsigned int sz; /* ziplist size in bytes */
unsigned int count : 16; /* count of items in ziplist */
unsigned int encoding : 2; /* RAW==1 or LZF==2 */
unsigned int container : 2; /* NONE==1 or ZIPLIST==2 */预留,指定为2
unsigned int recompress : 1; /* was this node previous compressed? */
unsigned int attempted_compress : 1; /* node can't compress; too small */测试用,不用理它
unsigned int extra : 10; /* more bits to steal for future usage */预留
} quicklistNode;
typedef struct quicklistLZF {
unsigned int sz; /* LZF size in bytes*/
char compressed[];
} quicklistLZF;
//表示一个压缩过的ziplist
typedef struct quicklist {
quicklistNode *head;//头和尾节点
quicklistNode *tail;
//下面两个长度其实就是整体的长度和细节的总长度
unsigned long count; /* total count of all entries in all ziplists */
unsigned long len; /* number of quicklistNodes */
int fill : 16; /* fill factor for individual nodes */
unsigned int compress : 16; /* depth of end nodes not to compress;0=off */
} quicklist;
typedef struct quicklistIter {
const quicklist * quicklist;//指向quicklist的指针
quicklistNode *current;//当前quicklistNode节点指针
unsigned char *zi;//当前ziplist指针
long offset; /* offset in current ziplist * /
int direction;//头尾开始标志
} quicklistIter;
typedef struct quicklistEntry {
const quicklist * quicklist;//指向所属的quicklist对象
quicklistNode * node;//指向所属的quicklistNode对象
unsigned char * zi;//指向当前的ziplist
unsigned char * value;//指向zplist对象的字符串Value成员
long long longval;//指向ziplist中的整数Value成员
unsigned int sz;//当前ziplist对象的大小
int offset;//保存相对ziplist的偏移位置大小
} quicklistEntry;
这里需要对quicklist中的两个位域,fill,compress说明一下,fill和compress对应着配置文件redis.conf,其的内容是:
fill对应配置:list-max-ziplist-size -2
后面的负数代表的意义如下:
-1 ziplist字节大小不能超过4kb。(建议)
-2 ziplist字节大小不能超过8kb。(默认配置)
-3 ziplist字节大小不能超过16kb。(一般不建议)
-4 ziplist字节大小不能超过32kb。(不建议)
-5 ziplist字节大小不能超过64kb。(正常工作量不建议)
如果数字为正数意思为ziplist结构所最多包含的entry数量( <= 215215)。
compress对应配置:list-compress-depth 0
配置数字意义为:
0 不压缩。(默认)
1 quicklist列表的头尾各有1个节点不压缩,中间的节点压缩。
2 quicklist列表的头尾各有2个节点不压缩,中间的节点压缩。
3 quicklist列表的头尾各有3个节点不压缩,中间的节点压缩。
以此类推,最大为 216216。
list-max-ziplist-size代表着ziplist大小的设置,而list-compress-depth代表的是压缩的深度,它就可以理解成前面提到的平衡点的选择。
2、插入
/* Insert a new entry before or after existing entry 'entry'.
*
* If after==1, the new value is inserted after 'entry', otherwise
* the new value is inserted before 'entry'. */
//如果after为1,则新的插入在已酆的Entry后,否则在前面插入
REDIS_STATIC void _quicklistInsert(quicklist *quicklist, quicklistEntry *entry,
void *value, const size_t sz, int after) {
int full = 0, at_tail = 0, at_head = 0, full_next = 0, full_prev = 0;
int fill = quicklist->fill;
quicklistNode *node = entry->node;
quicklistNode *new_node = NULL;
if (!node) {//创建新节点
/* we have no reference node, so let's create only node in the list */
D("No node given!");
new_node = quicklistCreateNode();
new_node->zl = ziplistPush(ziplistNew(), value, sz, ZIPLIST_HEAD);
__quicklistInsertNode(quicklist, NULL, new_node, after);
new_node->count++;
quicklist->count++;
return;
}
//如果超过限制不能插入,设置标志
/* Populate accounting flags for easier boolean checks later */
if (!_quicklistNodeAllowInsert(node, fill, sz)) {
D("Current node is full with count %d with requested fill %lu",
node->count, fill);
full = 1;
}
//后插入同时当前Entry为尾结点
if (after && (entry->offset == node->count)) {
D("At Tail of current ziplist");
at_tail = 1;
//如果后继节点不能插入
if (!_quicklistNodeAllowInsert(node->next, fill, sz)) {
D("Next node is full too.");
full_next = 1;
}
}
//如果是前插入且当前Entry为头节点
if (!after && (entry->offset == 0)) {
D("At Head");
at_head = 1;
if (!_quicklistNodeAllowInsert(node->prev, fill, sz)) {
D("Prev node is full too.");
full_prev = 1;
}
}
/* Now determine where and how to insert the new element */
//NODE大小允许,且后插入
if (!full && after) {
D("Not full, inserting after current position.");
quicklistDecompressNodeForUse(node);
unsigned char *next = ziplistNext(node->zl, entry->zi);
if (next == NULL) {
node->zl = ziplistPush(node->zl, value, sz, ZIPLIST_TAIL);
} else {
node->zl = ziplistInsert(node->zl, next, value, sz);
}
node->count++;
quicklistNodeUpdateSz(node);
quicklistRecompressOnly(quicklist, node);
} else if (!full && !after) {//不满且前插入
D("Not full, inserting before current position.");
quicklistDecompressNodeForUse(node);
node->zl = ziplistInsert(node->zl, entry->zi, value, sz);
node->count++;
quicklistNodeUpdateSz(node);
quicklistRecompressOnly(quicklist, node);
} else if (full && at_tail && node->next && !full_next && after) {
/* If we are: at tail, next has free space, and inserting after:
* - insert entry at head of next node. */
//当前node满,且当前entry为尾节点,node的后继节点指针不为空且可插入
//故插入next节点的头部
D("Full and tail, but next isn't full; inserting next node head");
new_node = node->next;
quicklistDecompressNodeForUse(new_node);
new_node->zl = ziplistPush(new_node->zl, value, sz, ZIPLIST_HEAD);
new_node->count++;
quicklistNodeUpdateSz(new_node);
quicklistRecompressOnly(quicklist, new_node);
} else if (full && at_head && node->prev && !full_prev && !after) {
/* If we are: at head, previous has free space, and inserting before:
* - insert entry at tail of previous node. */
//当前node超限,且当前已存在的entry是头节点,node的前驱节点指针不为空并可插入
//插入前驱节点的尾部
D("Full and head, but prev isn't full, inserting prev node tail");
new_node = node->prev;
quicklistDecompressNodeForUse(new_node);
new_node->zl = ziplistPush(new_node->zl, value, sz, ZIPLIST_TAIL);
new_node->count++;
quicklistNodeUpdateSz(new_node);
quicklistRecompressOnly(quicklist, new_node);
} else if (full && ((at_tail && node->next && full_next && after) ||
(at_head && node->prev && full_prev && !after))) {
/* If we are: full, and our prev/next is full, then:
* - create new node and attach to quicklist */
//当前node超限
//如果当前entry是尾节点,且后继节点指针不为空并不可以插入同时为后插
//如果当前entry为头节点,且前驱节点指针不为空并不可以插入,同时要前插
//创建新节点
D("\tprovisioning new node...");
new_node = quicklistCreateNode();
new_node->zl = ziplistPush(ziplistNew(), value, sz, ZIPLIST_HEAD);
new_node->count++;
quicklistNodeUpdateSz(new_node);
__quicklistInsertNode(quicklist, node, new_node, after);
} else if (full) {//任意插入中间位置,需要分裂
/* else, node is full we need to split it. */
/* covers both after and !after cases * /
D("\tsplitting node...");
quicklistDecompressNodeForUse(node);//解压
new_node = _quicklistSplitNode(node, entry->offset, after);//分裂两半
new_node->zl = ziplistPush(new_node->zl, value, sz,
after ? ZIPLIST_HEAD : ZIPLIST_TAIL);
new_node->count++;//更新计数器
quicklistNodeUpdateSz(new_node);//更新ziplist的大小
__quicklistInsertNode(quicklist, node, new_node, after);
_quicklistMergeNodes(quicklist, node);//可以合并的合并
}
quicklist->count++;//更新总的计数器
}
//在分裂后,如果有条件的话就对ziplist进行合并,主要是为了减小内存的碎片
/* Attempt to merge ziplists within two nodes on either side of 'center'.
*
* We attempt to merge:
* - (center->prev->prev, center->prev)
* - (center->next, center->next->next)
* - (center->prev, center)
* - (center, center->next)
*/
REDIS_STATIC void _quicklistMergeNodes(quicklist *quicklist,
quicklistNode *center) {
int fill = quicklist->fill;
quicklistNode *prev, *prev_prev, *next, *next_next, *target;
prev = prev_prev = next = next_next = target = NULL;
if (center->prev) {
prev = center->prev;
if (center->prev->prev)
prev_prev = center->prev->prev;
}
if (center->next) {
next = center->next;
if (center->next->next)
next_next = center->next->next;
}
/* Try to merge prev_prev and prev */
if (_quicklistNodeAllowMerge(prev, prev_prev, fill)) {
_quicklistZiplistMerge(quicklist, prev_prev, prev);
prev_prev = prev = NULL; /* they could have moved, invalidate them. */
}
/* Try to merge next and next_next */
if (_quicklistNodeAllowMerge(next, next_next, fill)) {
_quicklistZiplistMerge(quicklist, next, next_next);
next = next_next = NULL; /* they could have moved, invalidate them. */
}
/* Try to merge center node and previous node */
if (_quicklistNodeAllowMerge(center, center->prev, fill)) {
target = _quicklistZiplistMerge(quicklist, center->prev, center);
center = NULL; /* center could have been deleted, invalidate it. */
} else {
/* else, we didn't merge here, but target needs to be valid below. */
target = center;
}
/* Use result of center merge (or original) to merge with next node. * /
if (_quicklistNodeAllowMerge(target, target->next, fill)) {
_quicklistZiplistMerge(quicklist, target, target->next);
}
}
插入比较麻烦,主要是要拷虑两种情况,而每种情况又有一些细节需要考虑:
首先,在头尾节点插入,如果插入节点后有没有超过前面提到的配置的quicklistNode的大小,直接插入;否则要创建一个新的quicklistNode节点插入到双向链表同时创建相应的ziplist来插入。
其次,任意节点插入,这个要复杂一些:
插入位置所在的ziplist大小允许插入(未超过配置文件中限制大小)时,直接插入到ziplist;
插入位置所在的ziplist大小允许插入,且插入位置位于ziplist头或尾部,同时相邻的quicklist链表节点的ziplist大小允许插入,则插入到相邻的quicklist节点的ziplist中;
插入位置所在的ziplist大小允许插入,且插入的位置位于ziplist头或尾部,同时相邻的quicklist链表节点的ziplist大小超限制,此时新创建一个quicklist链表节点插入。
至于其它情况(插入位置所在的ziplist大小超限),即对应于在ziplist中间插入数据的情况,要把当前ziplist分裂为两个节点,再向其中一个插入。
3、删除
//下面的就是普通的链表的删除处理,只不过增加了压缩处理部分
REDIS_STATIC void __quicklistDelNode(quicklist *quicklist,
quicklistNode *node) {
if (node->next)
node->next->prev = node->prev;
if (node->prev)
node->prev->next = node->next;
if (node == quicklist->tail) {
quicklist->tail = node->prev;
}
if (node == quicklist->head) {
quicklist->head = node->next;
}
/* If we deleted a node within our compress depth, we
* now have compressed nodes needing to be decompressed. */
__quicklistCompress(quicklist, NULL);
quicklist->count -= node->count;
zfree(node->zl);
zfree(node);
quicklist->len--;
}
/* Delete one element represented by 'entry'
*
* 'entry' stores enough metadata to delete the proper position in
* the correct ziplist in the correct quicklist node. */
void quicklistDelEntry(quicklistIter *iter, quicklistEntry *entry) {
quicklistNode *prev = entry->node->prev;
quicklistNode *next = entry->node->next;
int deleted_node = quicklistDelIndex((quicklist *)entry->quicklist,
entry->node, &entry->zi);
/* after delete, the zi is now invalid for any future usage. */
iter->zi = NULL;
/* If current node is deleted, we must update iterator node and offset. */
if (deleted_node) {
if (iter->direction == AL_START_HEAD) {
iter->current = next;
iter->offset = 0;
} else if (iter->direction == AL_START_TAIL) {
iter->current = prev;
iter->offset = -1;
}
}
/* else if (!deleted_node), no changes needed.
* we already reset iter->zi above, and the existing iter->offset
* doesn't move again because:
* - [1, 2, 3] => delete offset 1 => [1, 3]: next element still offset 1
* - [1, 2, 3] => delete offset 0 => [2, 3]: next element still offset 0
* if we deleted the last element at offet N and now
* length of this ziplist is N-1, the next call into
* quicklistNext() will jump to the next node. */
}
删除相对来说比较简单,同时Redis中还提供了一个范围删除,一个是从整体上删除,一个是从细节上删除,同样的目的,粒度不同罢了。
4、查找
/* Get next element in iterator.
*
* Note: You must NOT insert into the list while iterating over it.
* You *may* delete from the list while iterating using the
* quicklistDelEntry() function.
* If you insert into the quicklist while iterating, you should
* re-create the iterator after your addition.
*
* iter = quicklistGetIterator(quicklist,<direction>);
* quicklistEntry entry;
* while (quicklistNext(iter, &entry)) {
* if (entry.value)
* [[ use entry.value with entry.sz ]]
* else
* [[ use entry.longval ]]
* }
*
* Populates 'entry' with values for this iteration.
* Returns 0 when iteration is complete or if iteration not possible.
* If return value is 0, the contents of 'entry' are not valid.
*/
int quicklistNext(quicklistIter *iter, quicklistEntry *entry) {
initEntry(entry);
if (!iter) {
D("Returning because no iter!");
return 0;
}
entry->quicklist = iter->quicklist;
entry->node = iter->current;
if (!iter->current) {
D("Returning because current node is NULL")
return 0;
}
unsigned char *(*nextFn)(unsigned char *, unsigned char *) = NULL;
int offset_update = 0;
if (!iter->zi) {
/* If !zi, use current index. */
quicklistDecompressNodeForUse(iter->current);
iter->zi = ziplistIndex(iter->current->zl, iter->offset);
} else {
/* else, use existing iterator offset and get prev/next as necessary. */
if (iter->direction == AL_START_HEAD) {
nextFn = ziplistNext;
offset_update = 1;
} else if (iter->direction == AL_START_TAIL) {
nextFn = ziplistPrev;
offset_update = -1;
}
iter->zi = nextFn(iter->current->zl, iter->zi);
iter->offset += offset_update;
}
entry->zi = iter->zi;
entry->offset = iter->offset;
if (iter->zi) {
/* Populate value from existing ziplist position */
ziplistGet(entry->zi, &entry->value, &entry->sz, &entry->longval);
return 1;
} else {
/* We ran out of ziplist entries.
* Pick next node, update offset, then re-run retrieval. */
quicklistCompress(iter->quicklist, iter->current);
if (iter->direction == AL_START_HEAD) {
/* Forward traversal */
D("Jumping to start of next node");
iter->current = iter->current->next;
iter->offset = 0;
} else if (iter->direction == AL_START_TAIL) {
/* Reverse traversal */
D("Jumping to end of previous node");
iter->current = iter->current->prev;
iter->offset = -1;
}
iter->zi = NULL;
return quicklistNext(iter, entry);
}
}
/* Populate 'entry' with the element at the specified zero-based index
* where 0 is the head, 1 is the element next to head
* and so on. Negative integers are used in order to count
* from the tail, -1 is the last element, -2 the penultimate
* and so on. If the index is out of range 0 is returned.
*
* Returns 1 if element found
* Returns 0 if element not found */
int quicklistIndex(const quicklist *quicklist, const long long idx,
quicklistEntry *entry) {
quicklistNode *n;
unsigned long long accum = 0;
unsigned long long index;
int forward = idx < 0 ? 0 : 1; /* < 0 -> reverse, 0+ -> forward */
initEntry(entry);
entry->quicklist = quicklist;
if (!forward) {
index = (-idx) - 1;
n = quicklist->tail;
} else {
index = idx;
n = quicklist->head;
}
if (index >= quicklist->count)
return 0;
while (likely(n)) {
if ((accum + n->count) > index) {
break;
} else {
D("Skipping over (%p) %u at accum %lld", (void *)n, n->count,
accum);
accum += n->count;
n = forward ? n->next : n->prev;
}
}
if (!n)
return 0;
D("Found node: %p at accum %llu, idx %llu, sub+ %llu, sub- %llu", (void *)n,
accum, index, index - accum, (-index) - 1 + accum);
entry->node = n;
if (forward) {
/* forward = normal head-to-tail offset. */
entry->offset = index - accum;
} else {
/* reverse = need negative offset for tail-to-head, so undo
* the result of the original if (index < 0) above. */
entry->offset = (-index) - 1 + accum;
}
quicklistDecompressNodeForUse(entry->node);
entry->zi = ziplistIndex(entry->node->zl, entry->offset);
ziplistGet(entry->zi, &entry->value, &entry->sz, &entry->longval);
/* The caller will use our result, so we don't re-compress here.
* The caller can recompress or delete the node as needed. */
return 1;
}
/* Returns an offset to use for iterating with ziplistNext. When the given
* index is negative, the list is traversed back to front. When the list
* doesn't contain an element at the provided index, NULL is returned. */
//查找ziplist
unsigned char *ziplistIndex(unsigned char *zl, int index) {
unsigned char *p;
unsigned int prevlensize, prevlen = 0;
if (index < 0) {
index = (-index)-1;
p = ZIPLIST_ENTRY_TAIL(zl);
if (p[0] != ZIP_END) {
ZIP_DECODE_PREVLEN(p, prevlensize, prevlen);
while (prevlen > 0 && index--) {
p -= prevlen;
ZIP_DECODE_PREVLEN(p, prevlensize, prevlen);
}
}
} else {
p = ZIPLIST_ENTRY_HEAD(zl);
while (p[0] != ZIP_END && index--) {
p += zipRawEntryLength(p);
}
}
return (p[0] == ZIP_END || index > 0) ? NULL : p;
}
查找其实就是遍历,Next再加上索引,就可以找到相关的位置。
三、总结
quicklist的主要功能其实就慢普通LIST和ziplist的合二为一,查找插入删除等动作,都得先从整体考虑,再从细节处理。同时,如果在一些特殊情况,还得考虑边界中如何处理的问题,这在前面都有所说明,再次指出,希望引起重视。