一 序
上一篇整理了《quicklist》底层数据结构的实现原理。Redis 3.2版本对外开放的列表list结构就是采用quicklist作为底层实现,所以编码上就只有OBJ_ENCODING_QUICKLIST,而quicklist底层又是ziplist.所以从list的接口层面来看,是基于quicklist的接口进行封装。源码在t_list.c. 可以结合quicklist的源码quicklist.c来看。
二 命令
server.h有list的一些接口
/* List data type */
void listTypeTryConversion(robj *subject, robj *value);
void listTypePush(robj *subject, robj *value, int where);
robj *listTypePop(robj *subject, int where);
unsigned long listTypeLength(robj *subject);
listTypeIterator *listTypeInitIterator(robj *subject, long index, unsigned char direction);
void listTypeReleaseIterator(listTypeIterator *li);
int listTypeNext(listTypeIterator *li, listTypeEntry *entry);
robj *listTypeGet(listTypeEntry *entry);
void listTypeInsert(listTypeEntry *entry, robj *value, int where);
int listTypeEqual(listTypeEntry *entry, robj *o);
void listTypeDelete(listTypeIterator *iter, listTypeEntry *entry);
void listTypeConvert(robj *subject, int enc);
void unblockClientWaitingData(client *c);
void handleClientsBlockedOnLists(void);
void popGenericCommand(client *c, int where);
void signalListAsReady(redisDb *db, robj *key);
具体的命令的实现
序号 | 命令及描述 |
1 | BLPOP key1 [key2 ] timeout:移出并获取列表的第一个元素, 如果列表没有元素会阻塞列表直到等待超时或发现可弹出元素为止。 |
2 | BRPOP key1 [key2 ] timeout:移出并获取列表的最后一个元素, 如果列表没有元素会阻塞列表直到等待超时或发现可弹出元素为止。 |
3 | BRPOPLPUSH source destination timeout:从列表中弹出一个值,将弹出的元素插入到另外一个列表中并返回它;如但果列表没有元素会阻塞列表直到等待超时或发现可弹出元素为止。 |
4 | LINDEX key index:通过索引获取列表中的元素 |
5 | LINSERT key BEFORE|AFTER pivot value:在列表的元素前或者后插入元素 |
6 | LLEN key:获取列表长度 |
7 | LPOP key:移出并获取列表的第一个元素 |
8 | LPUSH key value1 [value2]:将一个或多个值插入到列表头部 |
9 | LPUSHX key value:将一个或多个值插入到已存在的列表头部 |
10 | LRANGE key start stop:获取列表指定范围内的元素 |
11 | LREM key count value:移除列表元素 |
12 | LSET key index value:通过索引设置列表元素的值 |
13 | LTRIM key start stop:对一个列表进行修剪(trim),就是说,让列表只保留指定区间内的元素,不在指定区间之内的元素都将被删除。 |
14 | RPOP key:移除并获取列表最后一个元素 |
15 | RPOPLPUSH source destination:移除列表的最后一个元素,并将该元素添加到另一个列表并返回 |
16 | RPUSH key value1 [value2]:在列表中添加一个或多个值 |
17 | RPUSHX key value:为已存在的列表添加值 |
2.1 迭代器
redis 为list定义了迭代器,源码在server.h
/* Structure to hold list iteration abstraction. */
typedef struct {
robj *subject;
unsigned char encoding;
unsigned char direction; /* Iteration direction */
quicklistIter *iter;
} listTypeIterator;
/* Structure for an entry while iterating over a list. */
typedef struct {
listTypeIterator *li;
quicklistEntry entry; /* Entry in quicklist */
} listTypeEntry;
迭代器包装在一起来进一步屏蔽着底层编码方式的区别。与迭代器相关的操作主要有以下几个:
- 初始化迭代器
/* Initialize an iterator at the specified index. */
//初始化列表类型的迭代器为一个指定的下标
listTypeIterator *listTypeInitIterator(robj *subject, long index,
unsigned char direction) {
listTypeIterator *li = zmalloc(sizeof(listTypeIterator)); //分配空间
//设置迭代器的各个成员的初始值
li->subject = subject;
li->encoding = subject->encoding;
li->direction = direction;
li->iter = NULL; //quicklist迭代器为空
/* LIST_HEAD means start at TAIL and move *towards* head.
* LIST_TAIL means start at HEAD and move *towards tail. */
//获得迭代方向
int iter_direction =
direction == LIST_HEAD ? AL_START_TAIL : AL_START_HEAD;
//对列表对象编码为quicklist类型操作
if (li->encoding == OBJ_ENCODING_QUICKLIST) {
//将迭代器和下标为index的quicklistNode结合,迭代器指向该节点
li->iter = quicklistGetIteratorAtIdx(li->subject->ptr,
iter_direction, index);
} else {
serverPanic("Unknown list encoding");
}
return li;
}
- 迭代器的迭代
//将列表类型的迭代器指向的entry保存在提供的listTypeEntry结构中,并且更新迭代器,1表示成功,0失败
int listTypeNext(listTypeIterator *li, listTypeEntry *entry) {
/* Protect from converting when iterating */
//确保对象编码类型和迭代器中encoding成员相等
serverAssert(li->subject->encoding == li->encoding);
//设置listTypeEntry的entry成员关联到当前列表类型的迭代器
entry->li = li;
//对列表对象编码为quicklist类型操作
if (li->encoding == OBJ_ENCODING_QUICKLIST) {
//保存当前的entry到listTypeEntry的entry成员,并更新迭代器
return quicklistNext(li->iter, &entry->entry);
} else {
serverPanic("Unknown list encoding");
}
return 0;
}
2.2 非阻塞命令
push
void lpushCommand(client *c) {
pushGenericCommand(c,LIST_HEAD);
}
void rpushCommand(client *c) {
pushGenericCommand(c,LIST_TAIL);
}
void pushxGenericCommand(client *c, robj *refval, robj *val, int where) {
robj *subject;
listTypeIterator *iter;
listTypeEntry entry;
int inserted = 0;
if ((subject = lookupKeyWriteOrReply(c,c->argv[1],shared.czero)) == NULL ||//以写操作读取key对象的value,如果读取失败则返回。
checkType(c,subject,OBJ_LIST)) return;//不是列表类型返回
if (refval != NULL) {
/* Seek refval from head to tail */
iter = listTypeInitIterator(subject,0,LIST_TAIL); //创建一个列表的迭代器
while (listTypeNext(iter,&entry)) {//将指向当前的entry节点保存到列表类型的entry中,然后指向下一个entry节点
if (listTypeEqual(&entry,refval)) { //当前的entry节点的值与基准值refval是否相等
listTypeInsert(&entry,val,where);//讲值插入到节点的前面或者后面
inserted = 1;
break;
}
}
listTypeReleaseIterator(iter);//释放迭代器
if (inserted) {//插入成功
signalModifiedKey(c->db,c->argv[1]); //发信号给client
notifyKeyspaceEvent(NOTIFY_LIST,"linsert",
c->argv[1],c->db->id); //linsert的 event。整个事件通知这块还不明白
server.dirty++; //脏键++
} else {
/* Notify client of a failed insert */
addReply(c,shared.cnegone); //插入失败,通知client
return;
}
} else {//如果基准值为空,判断where执行的命令是lpush or rpush
char *event = (where == LIST_HEAD) ? "lpush" : "rpush";
listTypePush(subject,val,where); //将val对象推入到列表的头部或尾部
signalModifiedKey(c->db,c->argv[1]); //下面还是通知机制
notifyKeyspaceEvent(NOTIFY_LIST,event,c->argv[1],c->db->id);
server.dirty++;
}
addReplyLongLong(c,listTypeLength(subject)); //将插入val后的列表的元素个数发送给client
}
void listTypeInsert(listTypeEntry *entry, robj *value, int where) {
if (entry->li->encoding == OBJ_ENCODING_QUICKLIST) { // 插入到 quickLIST
value = getDecodedObject(value);// 返回对象未编码的值
sds str = value->ptr;
size_t len = sdslen(str);
if (where == LIST_TAIL) {//后插
quicklistInsertAfter((quicklist *)entry->entry.quicklist,
&entry->entry, str, len);
} else if (where == LIST_HEAD) {//前叉
quicklistInsertBefore((quicklist *)entry->entry.quicklist,
&entry->entry, str, len);
}
decrRefCount(value);//增加count
} else {
serverPanic("Unknown list encoding");//否则编码错误
}
}
//列表类型的从where插入一个value,PUSH命令的底层实现
void listTypePush(robj *subject, robj *value, int where) {
//对列表对象编码为quicklist类型操作
if (subject->encoding == OBJ_ENCODING_QUICKLIST) {
//根据where保存quicklist的头节点地址或尾节点地址
int pos = (where == LIST_HEAD) ? QUICKLIST_HEAD : QUICKLIST_TAIL;
//获得value编码为RAW的字符串对象
value = getDecodedObject(value);
//保存value的长度
size_t len = sdslen(value->ptr);
//PUSH value的值到quicklist的头或尾
quicklistPush(subject->ptr, value->ptr, len, pos);
//value的引用计数减1
decrRefCount(value);
} else {
serverPanic("Unknown list encoding"); //不是quicklist类型的编码则发送错误信息
}
}
可以看到push相关大的底层操作,是基于quicklist的push跟insert。
pop
再来看看pop
void lpopCommand(client *c) {
popGenericCommand(c,LIST_HEAD);
}
void rpopCommand(client *c) {
popGenericCommand(c,LIST_TAIL);
}
void popGenericCommand(client *c, int where) {
//以写操作取出key对象的value值
robj *o = lookupKeyWriteOrReply(c,c->argv[1],shared.nullbulk);
// 如果key没找到或value对象不是列表类型则直接返回
if (o == NULL || checkType(c,o,OBJ_LIST)) return;
//从where 弹出一个value
robj *value = listTypePop(o,where);
//如果value为空,则发送空信息
if (value == NULL) {
addReply(c,shared.nullbulk);
} else {
//保存事件名称
char *event = (where == LIST_HEAD) ? "lpop" : "rpop";
//发送value给client
addReplyBulk(c,value);
//释放value对象
decrRefCount(value);
//发送事件通知
notifyKeyspaceEvent(NOTIFY_LIST,event,c->argv[1],c->db->id);
//如果弹出一个元素后,列表为空
if (listTypeLength(o) == 0) {
//发送"del"时间通知
notifyKeyspaceEvent(NOTIFY_GENERIC,"del",
c->argv[1],c->db->id);
//从数据库中删除当前的key
dbDelete(c->db,c->argv[1]);
}
//当数据库的键被改动,则会调用该函数发送信号
signalModifiedKey(c->db,c->argv[1]);
//更新脏键
server.dirty++;
}
}
robj *listTypePop(robj *subject, int where) {
long long vlong;
robj *value = NULL;
//获得POP的位置,quicklist的头部或尾部
int ql_where = where == LIST_HEAD ? QUICKLIST_HEAD : QUICKLIST_TAIL;
if (subject->encoding == OBJ_ENCODING_QUICKLIST) {//编码类型quicklist
if (quicklistPopCustom(subject->ptr, ql_where, (unsigned char **)&value,
NULL, &vlong, listPopSaver)) {// 调用quicklist的pop:从ql_where位置POP出一个entry节点,保存在value或vlong中
if (!value)
value = createStringObjectFromLongLong(vlong);//类型转换,从long转为String
}
} else {
serverPanic("Unknown list encoding");//编码错误
}
return value;
}
2.3 阻塞命令
首先我们介绍阻塞命令的两种行为。
2.3.1 非阻塞行为
假如 BLPOP 或 BLPOP 命令被执行,当给定的所有个key内,
至少有1个是非空列表,那么就会直接将结果和信息返回给调用者。
有多个非空列表,按照key的先后顺序,依次检查各个列表。
2.3.2 阻塞行为
如果所有给定的key中不存在,或者key中包含的是空列表,那么 BLPOP 或 BLPOP 命令将会被阻塞连接,直到另一个client对这些key中执行 [LR]PUSH 命令将一个新数据出现在任意key的列表中,那么这个命令会解除调用BLPOP 或 BLPOP 命令的client的阻塞状态。
2.3.3 阻塞命令的实现
其实阻塞命令实现就是在非阻塞命令的基础上,只需要进行判断了相应的阻塞操作即可。
void blpopCommand(client *c) {
blockingPopGenericCommand(c,LIST_HEAD);
}
void brpopCommand(client *c) {
blockingPopGenericCommand(c,LIST_TAIL);
}
/* Blocking RPOP/LPOP */
void blockingPopGenericCommand(client *c, int where) {
robj *o;
mstime_t timeout;
int j;
// 取出 timeout 参数,秒为单位
if (getTimeoutFromObjectOrReply(c,c->argv[c->argc-1],&timeout,UNIT_SECONDS)
!= C_OK) return;
//非阻塞行为
// 遍历所有列表键key,如果key有值,则一定能返回
for (j = 1; j < c->argc-1; j++) {
o = lookupKeyWrite(c->db,c->argv[j]);//以写操作取出当前key的值
if (o != NULL) {// 列表不为空?
if (o->type != OBJ_LIST) {//类型不是list
addReply(c,shared.wrongtypeerr);
return;
} else {
if (listTypeLength(o) != 0) {//非空列表
/* Non empty list, this is like a non normal [LR]POP. */
char *event = (where == LIST_HEAD) ? "lpop" : "rpop";// 保存事件名称
robj *value = listTypePop(o,where);//弹出值
serverAssert(value != NULL);
// 发送回复给client
addReplyMultiBulkLen(c,2);
addReplyBulk(c,c->argv[j]);
addReplyBulk(c,value);
decrRefCount(value);//释放value
notifyKeyspaceEvent(NOTIFY_LIST,event,
c->argv[j],c->db->id);
if (listTypeLength(o) == 0) {//删除空列表
dbDelete(c->db,c->argv[j]);
notifyKeyspaceEvent(NOTIFY_GENERIC,"del",
c->argv[j],c->db->id);
}
signalModifiedKey(c->db,c->argv[j]);
server.dirty++;
/* Replicate it as an [LR]POP instead of B[LR]POP. */
// 传播一个[LR]POP 而不是B[LR]POP,修改client原来的命令参数,就不用在阻塞了
rewriteClientCommandVector(c,2,
(where == LIST_HEAD) ? shared.lpop : shared.rpop,
c->argv[j]);
return;
}
}
}
}
/* If we are inside a MULTI/EXEC and the list is empty the only thing
* we can do is treating it as a timeout (even with timeout 0). */
// 如果命令在一个事务中执行,那么为了不产生死等待
// 服务器只能向客户端发送一个空回复
if (c->flags & CLIENT_MULTI) {
addReply(c,shared.nullmultibulk);
return;
}
/* If the list is empty or the key does not exists we must block */
// 所有输入列表键都不存在,只能阻塞了
blockForKeys(c, c->argv + 1, c->argc - 2, timeout, NULL);
}
前面的能找到key,直接返回不会阻塞,很好理解,所以看阻塞的过程,也就是blockForKeys()函数。
/*-----------------------------------------------------------------------------
* Blocking POP operations
*----------------------------------------------------------------------------*/
/* This is how the current blocking POP works, we use BLPOP as example:
* - If the user calls BLPOP and the key exists and contains a non empty list
* then LPOP is called instead. So BLPOP is semantically the same as LPOP
* if blocking is not required.
* - If instead BLPOP is called and the key does not exists or the list is
* empty we need to block. In order to do so we remove the notification for
* new data to read in the client socket (so that we'll not serve new
* requests if the blocking request is not served). Also we put the client
* in a dictionary (db->blocking_keys) mapping keys to a list of clients
* blocking for this keys.
* - If a PUSH operation against a key with blocked clients waiting is
* performed, we mark this key as "ready", and after the current command,
* MULTI/EXEC block, or script, is executed, we serve all the clients waiting
* for this list, from the one that blocked first, to the last, accordingly
* to the number of elements we have in the ready list.
*/
/* Set a client in blocking mode for the specified key, with the specified
* timeout */
void blockForKeys(client *c, robj **keys, int numkeys, mstime_t timeout, robj *target) {
dictEntry *de;
list *l;
int j;
// 设置阻塞状态的超时和目标选项
c->bpop.timeout = timeout;
c->bpop.target = target;
//增加target的引用计数
if (target != NULL) incrRefCount(target);
// 关联阻塞客户端和键的相关信息
for (j = 0; j < numkeys; j++) {
/* If the key already exists in the dict ignore it. */
// c->bpop.keys 是一个集合(值为 NULL 的字典)
// 它记录所有造成客户端阻塞的键
// 以下语句在键不存在于集合的时候,将它添加到集合
if (dictAdd(c->bpop.keys,keys[j],NULL) != DICT_OK) continue;
incrRefCount(keys[j]);
/* And in the other "side", to map keys -> clients */
// c->db->blocking_keys 字典的键为造成客户端阻塞的键
// 而值则是一个链表,链表中包含了所有被阻塞的客户端
// 以下程序将阻塞键和被阻塞客户端关联起来
de = dictFind(c->db->blocking_keys,keys[j]);
if (de == NULL) {
// 链表不存在,新创建一个,并将它关联到字典中
int retval;
/* For every key we take a list of clients blocked for it */
l = listCreate();
retval = dictAdd(c->db->blocking_keys,keys[j],l);
incrRefCount(keys[j]);
serverAssertWithInfo(c,keys[j],retval == DICT_OK);
} else { //如果已经有了,则当前key的值保存起来,值是一个列表
l = dictGetVal(de);
}
// 将客户端填接到被阻塞客户端的链表中
listAddNodeTail(l,c);
}
blockClient(c,BLOCKED_LIST); //阻塞client
}
从上面的代码中,使用了client结构的一些成员分别是c->bpop.xxxx 和 c->db->blocking_keys。我们分别查看一下其定义:代码在server.h,
/* With multiplexing we need to take per-client state.
* Clients are taken in a linked list. */
typedef struct client {
uint64_t id; /* Client incremental unique ID. */
int fd; /* Client socket. */
redisDb *db; /* Pointer to currently SELECTed DB. */
int dictid; /* ID of the currently SELECTed DB. */
robj *name; /* As set by CLIENT SETNAME. */
sds querybuf; /* Buffer we use to accumulate client queries. */
size_t querybuf_peak; /* Recent (100ms or more) peak of querybuf size. */
int argc; /* Num of arguments of current command. */
robj **argv; /* Arguments of current command. */
struct redisCommand *cmd, *lastcmd; /* Last command executed. */
int reqtype; /* Request protocol type: PROTO_REQ_* */
int multibulklen; /* Number of multi bulk arguments left to read. */
long bulklen; /* Length of bulk argument in multi bulk request. */
list *reply; /* List of reply objects to send to the client. */
unsigned long long reply_bytes; /* Tot bytes of objects in reply list. */
size_t sentlen; /* Amount of bytes already sent in the current
buffer or object being sent. */
time_t ctime; /* Client creation time. */
time_t lastinteraction; /* Time of the last interaction, used for timeout */
time_t obuf_soft_limit_reached_time;
int flags; /* Client flags: CLIENT_* macros. */
int authenticated; /* When requirepass is non-NULL. */
int replstate; /* Replication state if this is a slave. */
int repl_put_online_on_ack; /* Install slave write handler on ACK. */
int repldbfd; /* Replication DB file descriptor. */
off_t repldboff; /* Replication DB file offset. */
off_t repldbsize; /* Replication DB file size. */
sds replpreamble; /* Replication DB preamble. */
long long reploff; /* Replication offset if this is our master. */
long long repl_ack_off; /* Replication ack offset, if this is a slave. */
long long repl_ack_time;/* Replication ack time, if this is a slave. */
long long psync_initial_offset; /* FULLRESYNC reply offset other slaves
copying this slave output buffer
should use. */
char replrunid[CONFIG_RUN_ID_SIZE+1]; /* Master run id if is a master. */
int slave_listening_port; /* As configured with: REPLCONF listening-port */
char slave_ip[NET_IP_STR_LEN]; /* Optionally given by REPLCONF ip-address */
int slave_capa; /* Slave capabilities: SLAVE_CAPA_* bitwise OR. */
multiState mstate; /* MULTI/EXEC state */
int btype; /* Type of blocking op if CLIENT_BLOCKED. */
blockingState bpop; /* blocking state */
long long woff; /* Last write global replication offset. */
list *watched_keys; /* Keys WATCHED for MULTI/EXEC CAS */
dict *pubsub_channels; /* channels a client is interested in (SUBSCRIBE) */
list *pubsub_patterns; /* patterns a client is interested in (SUBSCRIBE) */
sds peerid; /* Cached peer ID. */
/* Response buffer */
int bufpos;
char buf[PROTO_REPLY_CHUNK_BYTES];
} client;
属性还挺多的,先看这次阻塞相关的。blockingState,redisDb 。
typedef struct blockingState {
/* Generic fields. */
//阻塞的时间
mstime_t timeout; /* Blocking operation timeout. If UNIX current time
* is > timeout then the operation timed out. */
/* BLOCKED_LIST */
//造成阻塞的键
dict *keys; /* The keys we are waiting to terminate a blocking
* operation such as BLPOP. Otherwise NULL. */
//用于BRPOPLPUSH命令
//用于保存PUSH入元素的键,也就是dstkey
robj *target; /* The key that should receive the element,
* for BRPOPLPUSH. */
/* BLOCKED_WAIT */
int numreplicas; /* Number of replicas we are waiting for ACK. */
long long reploffset; /* Replication offset to reach. */
} blockingState;
typedef struct redisDb {
//正处于阻塞状态的键
dict *blocking_keys; /* Keys with clients waiting for data (BLPOP) */
//可以解除阻塞的键
dict *ready_keys; /* Blocked keys that received a PUSH */
} redisDb;
blockForKeys大致流程: 刚开始将timeout和target分别读入blockingState中。然后遍历所有传过来的key,这些key都是造成当前client阻塞的键,我们需要将这些键添加记录,因此,将这些键加入到 c->bpop.keys 中。我们从上面的blockingState结构看出 c->bpop.keys 这个成员是一个字典结构,这个词典记录着所有造成客户端阻塞的键。而且加入字典中的键为传入的所有key,而值则为NULL。当然不光记录所有造成客户端阻塞的键,还要这些所有键和造成阻塞的客户端添加对应的映射关系。
明白了阻塞的实现,那么对应的解阻塞的代码就很容易看懂。解阻塞就是从记录中删除对应key并且解除key和client的映射。
/* Unblock a client that's waiting in a blocking operation such as BLPOP.
* You should never call this function directly, but unblockClient() instead. */
void unblockClientWaitingData(redisClient *c) {
dictEntry *de;
dictIterator *di;
list *l;
redisAssertWithInfo(c,NULL,dictSize(c->bpop.keys) != 0);
// 遍历所有 key ,将它们从客户端 db->blocking_keys 的链表中移除
di = dictGetIterator(c->bpop.keys);
/* The client may wait for multiple keys, so unblock it for every key. */
while((de = dictNext(di)) != NULL) {
robj *key = dictGetKey(de);
/* Remove this client from the list of clients waiting for this key. */
// 获取所有因为 key 而被阻塞的客户端的链表
l = dictFetchValue(c->db->blocking_keys,key);
redisAssertWithInfo(c,key,l != NULL);
// 将指定客户端从链表中删除
listDelNode(l,listSearchKey(l,c));
/* If the list is empty we need to remove it to avoid wasting memory */
// 如果已经没有其他客户端阻塞在这个 key 上,那么删除这个链表
if (listLength(l) == 0)
dictDelete(c->db->blocking_keys,key);
}
dictReleaseIterator(di);
/* Cleanup the client structure */
// 清空 bpop.keys 集合(字典)
dictEmpty(c->bpop.keys,NULL);
if (c->bpop.target) {
decrRefCount(c->bpop.target);
c->bpop.target = NULL;
}
}
关于解除阻塞的完整流程我没有整理完,待补充。晚安。
*******************补充接除阻塞流程*************************
List的阻塞解除过程如下:
(1)、 如果有其它客户端执行命令往该key(即List)添加新值,先在blocking_keys中检查是否有客户端因该key而被阻塞,如果有则调用signalListAsReady为该key创建一个readyList结构并放入server.ready_keys链表中,同时也将该key添加到db->ready_keys中。db->ready_keys是一个哈希表,它的value为NULL。这个server.ready_keys列表最后会handleClientsBlockedOnLists函数处理。
注意这里有所优化:为什么要用一个链表和一个哈希表来存储同一个key?如果往一个key中添加了多个新值,Redis只需要往server.ready_keys为该key保存一个相关的readyList节点即可,这样可以避免在一个事务或脚本中将同一个key一次又一次地添加到server.ready_keys列表中。为了不重复添加,每次执行添加查找前需要进行一次“查重”操作,但是server.ready_keys是一个链表,在其中进行查找操作时间复杂度为O(n),效率比较差。为解决这个问题Redis引入了db->ready_keys哈希表结构来保存同一个key,哈希表的查找查找效率高,所以每次往server.ready_keys添加节点时候只要在db->ready_keys检查一下就知道server.ready_keys有没有相同的节点了。这一点我们做开发的时候也可以借鉴。比如做关联关系或者映射关系的时候。尤其是初始化到内存很适应。
下面我们来看看signalListAsReady函数涉及到的结构体:
readyList定义在redis.h文件中:
/* The following structure represents a node in the server.ready_keys list,
* where we accumulate all the keys that had clients blocked with a blocking
* operation such as B[LR]POP, but received new data in the context of the
* last executed command.
*
* After the execution of every command or script, we run this list to check
* if as a result we should serve data to clients blocked, unblocking them.
* Note that server.ready_keys will not have duplicates as there dictionary
* also called ready_keys in every structure representing a Redis database,
* where we make sure to remember if a given key was already added in the
* server.ready_keys list. */
typedef struct readyList {
redisDb *db; // key所在的数据库
robj *key; // 造成阻塞的键
} readyList;
我作为初学者对这里颇有印象,因为找源码过程中对整体不熟悉,往往不好找。
readyList结构表示server.ready_keys链表中的一个节点,其中key字段表示阻塞的key,db指向该键所在的数据库。
db->ready_keys定义在redisDb结构体中,用于存放已经准备好数据的阻塞状态的key:
/* Redis database representation. There are multiple databases identified
* by integers from 0 (the default database) up to the max configured
* database. The database number is the 'id' field in the structure. */
typedef struct redisDb {
dict *dict; /* The keyspace for this DB */
dict *expires; /* Timeout of keys with a timeout set */
dict *blocking_keys; /* Keys with clients waiting for data (BLPOP) */
dict *ready_keys; /* Blocked keys that received a PUSH 已经准备好数据的阻塞状态的key*/
dict *watched_keys; /* WATCHED keys for MULTI/EXEC CAS */
struct evictionPoolEntry *eviction_pool; /* Eviction pool of keys */
int id; /* Database ID */
long long avg_ttl; /* Average TTL, just for stats */
} redisDb;
signalListAsReady函数的源码如下,在t_list.c:
/* If the specified key has clients blocked waiting for list pushes, this
* function will put the key reference into the server.ready_keys list.
* Note that db->ready_keys is a hash table that allows us to avoid putting
* the same key again and again in the list in case of multiple pushes
* made by a script or in the context of MULTI/EXEC.
* 如果有客户端正因为等待给定 key 被 push 而阻塞,
* 那么将这个 key 的放进 server.ready_keys 列表里面。
*
* 注意 db->ready_keys 是一个哈希表,
* 这可以避免在事务或者脚本中,将同一个 key 一次又一次添加到列表的情况出现。
* The list will be finally processed by handleClientsBlockedOnLists() */
void signalListAsReady(redisDb *db, robj *key) {
readyList *rl;
/* No clients blocking for this key? No need to queue it. */
// 如果没有客户端因这个key而被阻塞,则直接返回
if (dictFind(db->blocking_keys,key) == NULL) return;
/* Key was already signaled? No need to queue it again. */
// 如果这个key已经添加到ready_keys,为避免重复添加直接返回
if (dictFind(db->ready_keys,key) != NULL) return;
/* Ok, we need to queue this key into server.ready_keys. */
// 创建一个 readyList 结构,保存键和数据库
rl = zmalloc(sizeof(*rl));
rl->key = key;
rl->db = db;
incrRefCount(key);//增加引用
listAddNodeTail(server.ready_keys,rl); // 然后将 readyList 添加到 server.ready_keys 中
/* We also add the key in the db->ready_keys dictionary in order
* to avoid adding it multiple times into a list with a simple O(1)
* check. */
//将 key 添加到 c->db->ready_keys 集合中,防止重复添加
incrRefCount(key);
serverAssert(dictAdd(db->ready_keys,key,NULL) == DICT_OK);
}
到目前为止,Redis只是收集好了已经准备好数据的处于阻塞状态的key信息,接下来才是真正解除客户端阻塞状态的操作。
(2)、调用handleClientsBlockedOnLists函数,该函数将遍历server.ready_keys中已经准备好数据的key,同时遍历阻塞在该key上的所有客户端(直接从c->db->blocking_keys地点中获取客户端列表)。如果key不为空则从key中弹出一个元素返回给客户端并解除客户端的阻塞状态直到该key为空或没有客户端因为该key而阻塞为止。
/* This function should be called by Redis every time a single command,
* a MULTI/EXEC block, or a Lua script, terminated its execution after
* being called by a client.
*这个函数会在 Redis 每次执行完单个命令、事务块或 Lua 脚本之后调用。
* All the keys with at least one client blocked that received at least
* one new element via some PUSH operation are accumulated into
* the server.ready_keys list. This function will run the list and will
* serve clients accordingly. Note that the function will iterate again and
* again as a result of serving BRPOPLPUSH we can have new blocking clients
* to serve because of the PUSH side of BRPOPLPUSH.
** 对所有被阻塞在某个客户端的 key 来说,只要这个 key 被执行了某种 PUSH 操作
* 那么这个 key 就会被放到 serve.ready_keys 去。
*
* 这个函数会遍历整个 serve.ready_keys 链表,
* 并将里面的 key 的元素弹出给被阻塞客户端,
* 从而解除客户端的阻塞状态。
*
* 函数会一次又一次地进行迭代,
* 因此它在执行 BRPOPLPUSH 命令的情况下也可以正常获取到正确的新被阻塞客户端。
/
void handleClientsBlockedOnLists(void) {
// 遍历整个 ready_keys 链表
while(listLength(server.ready_keys) != 0) {
list *l;
/* Point server.ready_keys to a fresh list and save the current one
* locally. This way as we run the old list we are free to call
* signalListAsReady() that may push new elements in server.ready_keys
* when handling clients blocked into BRPOPLPUSH. */
// 备份server.ready_keys,然后再给服务器创建一个新列表。接下来的操作都在备份server.ready_keys上进行
l = server.ready_keys;
server.ready_keys = listCreate();
while(listLength(l) != 0) {
listNode *ln = listFirst(l);// 取出 ready_keys 中的首个链表节点,不要看错了,是L不是1.
readyList *rl = ln->value;// 指向 readyList 结构
/* First of all remove this key from db->ready_keys so that
* we can safely call signalListAsReady() against this key.
* 从 db->ready_keys 中移除就绪的 key
*/
dictDelete(rl->db->ready_keys,rl->key);
/* If the key exists and it's a list, serve blocked clients
* with data. */
// 获取listType对象,这个对象应该是非空的,并且是列表
robj *o = lookupKeyWrite(rl->db,rl->key);
if (o != NULL && o->type == OBJ_LIST) {
dictEntry *de;
/* We serve clients in the same order they blocked for
* this key, from the first blocked to the last. */
// 取出所有被这个 key 阻塞的客户端
de = dictFind(rl->db->blocking_keys,rl->key);
if (de) {
list *clients = dictGetVal(de);
int numclients = listLength(clients);
while(numclients--) {
listNode *clientnode = listFirst(clients); // 取出一个客户端
client *receiver = clientnode->value;
// 设置pop出的目标对象(只在 BRPOPLPUSH 时使用)
robj *dstkey = receiver->bpop.target;
// 从列表中弹出元素
// 弹出的位置取决于是执行 BLPOP 还是 BRPOP 或者 BRPOPLPUSH
int where = (receiver->lastcmd &&
receiver->lastcmd->proc == blpopCommand) ?
LIST_HEAD : LIST_TAIL;
robj *value = listTypePop(o,where);
if (value) {// 如果listType还有元素(就是非null),返回给相应客户端
/* Protect receiver->bpop.target, that will be
* freed by the next unblockClient()
* call. */
if (dstkey) incrRefCount(dstkey);
// 取消客户端的阻塞状态
unblockClient(receiver);
// 将pop出来的值返回给相应的客户端receiver(就是把value推入receiver的key)
if (serveClientBlockedOnList(receiver,
rl->key,dstkey,rl->db,value,
where) == C_ERR)
{
/* If we failed serving the client we need
* to also undo the POP operation. */
// 如果操作失败,则回滚(插入原listType对象)
listTypePush(o,value,where);
}
if (dstkey) decrRefCount(dstkey);
decrRefCount(value);
} else { // 如果listType中没有元素了,没有元素可以返回剩余被阻塞客户端,
// 这些客户端要等待对键的下次 PUSH
break;
}
}
}
if (listTypeLength(o) == 0) {//如果列表元素已经为空,那么从数据库中将它删除
dbDelete(rl->db,rl->key);
}
/* We don't call signalModifiedKey() as it was already called
* when an element was pushed on the list. */
}
/* Free this item. */ // 资源释放
decrRefCount(rl->key);
zfree(rl);
listDelNode(l,ln);
}
listRelease(l); /* We have the new list on place at this point. */
}
}
那么还是没有跟我上面列的删除unblockClientWaitingData对应起来,就是unblockClient.
这个函数在server.h定义接口,真正实现有找了好一会。在blocked.c
/* Unblock a client calling the right function depending on the kind
* of operation the client is blocking for. */
void unblockClient(client *c) {
if (c->btype == BLOCKED_LIST) {
unblockClientWaitingData(c);
} else if (c->btype == BLOCKED_WAIT) {
unblockClientWaitingReplicas(c);
} else {
serverPanic("Unknown btype in unblockClient().");
}
/* Clear the flags, and put the client in the unblocked list so that
* we'll process new commands in its query buffer ASAP. */
c->flags &= ~CLIENT_BLOCKED;
c->btype = BLOCKED_NONE;
server.bpop_blocked_clients--;
/* The client may already be into the unblocked list because of a previous
* blocking operation, don't add back it into the list multiple times. */
if (!(c->flags & CLIENT_UNBLOCKED)) {
c->flags |= CLIENT_UNBLOCKED;
listAddNodeTail(server.unblocked_clients,c);
}
}
根据类型取消给定的客户端的阻塞状态。怎么样,有没有豁然开朗的感觉。。。。大神请无视。没前进一步都是值得的。
参考: