networking的代码主要是针对client的命令进行处理,主要是实现三个功能:client连接的管理;解析client的请求;发送回复内容给client。
一、client连接的管理
进行连接的接收,并创建client结构体,完成内部属性的初始化,事件回调函数的设置。
void acceptTcpHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
……
while(max--) {
cfd = anetTcpAccept(server.neterr, fd, cip, sizeof(cip), &cport);
if (cfd == ANET_ERR) {
……
return;
}
serverLog(LL_VERBOSE,"Accepted %s:%d", cip, cport);
acceptCommonHandler(cfd,0,cip);
}
}
static void acceptCommonHandler(int fd, int flags, char *ip) {
client *c;//连接建立创建client结构体
if ((c = createClient(fd)) == NULL) {
close(fd); /* May be already closed, just ignore errors */
return;
}
//client连接个数超过server.maxclients,将被拒绝,释放client
if (listLength(server.clients) > server.maxclients) {
……
server.stat_rejected_conn++;
freeClient(c);
return;
}
……
server.stat_numconnections++;
c->flags |= flags;
}
client *createClient(int fd) {
client *c = zmalloc(sizeof(client));
//设置socket fd的选项和可读事件回调函数readQueryFromClient
if (fd != -1) {
anetNonBlock(NULL,fd);
anetEnableTcpNoDelay(NULL,fd);
if (server.tcpkeepalive)
anetKeepAlive(NULL,fd,server.tcpkeepalive);
if (aeCreateFileEvent(server.el,fd,AE_READABLE,
readQueryFromClient, c) == AE_ERR)
{
……
}
}
selectDb(c,0);
……//初始化client内部属性
listSetFreeMethod(c->pubsub_patterns,decrRefCountVoid);
listSetMatchMethod(c->pubsub_patterns,listMatchObjects);
if (fd != -1) listAddNodeTail(server.clients,c);
initClientMultiState(c);
return c;
}
二、解析client的请求
在统一的请求协议中, 所有发送至 Redis 服务器的参数都是二进制安全(binary safe)的。
协议的一般格式如下:
*<参数数量> CR LF
$<参数 1 的字节数量> CR LF
<参数 1 的数据> CR LF
...
$<参数 N 的字节数量> CR LF
<参数 N 的数据> CR LF
int processMultibulkBuffer(client *c);//解析统一的请求协议的命令
除了统一的请求协议,还有一种内联命令格式。内联命令没有了统一请求协议中的 "*" 项来声明参数的数量,所以在输入命令的时候, 必须使用空格来分割各个参数, 服务器在接收到数据之后,会按空格对用户的输入进行分析(parse), 并获取其中的命令参数。
int processInlineBuffer(client *c);//解析内联
client请求的解析和执行的过程
//fd的读事件发生,回调readQueryFromClient
void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask) {
……
qblen = sdslen(c->querybuf);
if (c->querybuf_peak < qblen) c->querybuf_peak = qblen;
c->querybuf = sdsMakeRoomFor(c->querybuf, readlen);
nread = read(fd, c->querybuf+qblen, readlen);//接收client的请求,存放到querybuf
……
sdsIncrLen(c->querybuf,nread);
c->lastinteraction = server.unixtime;//记录交互时间
if (c->flags & CLIENT_MASTER) c->reploff += nread;
server.stat_net_input_bytes += nread;
//querybuf大小超过限制,释放client
if (sdslen(c->querybuf) > server.client_max_querybuf_len) {
sds ci = catClientInfoString(sdsempty(),c), bytes = sdsempty();
bytes = sdscatrepr(bytes,c->querybuf,64);
sdsfree(ci);
sdsfree(bytes);
freeClient(c);
return;
}//进行请求解析,并执行请求
processInputBuffer(c);
}
void processInputBuffer(client *c) {
server.current_client = c;
/* Keep processing while there is something in the input buffer */
while(sdslen(c->querybuf)) {
……
if (!c->reqtype) {
if (c->querybuf[0] == '*') {
c->reqtype = PROTO_REQ_MULTIBULK;
} else {
c->reqtype = PROTO_REQ_INLINE;
}
}//解析请求
if (c->reqtype == PROTO_REQ_INLINE) {
if (processInlineBuffer(c) != C_OK) break;
} else if (c->reqtype == PROTO_REQ_MULTIBULK) {
if (processMultibulkBuffer(c) != C_OK) break;
} else {
serverPanic("Unknown request type");
}
/* Multibulk processing could see a <= 0 length. */
if (c->argc == 0) {
resetClient(c);
} else {
//执行命令
if (processCommand(c) == C_OK)
resetClient(c);
}
}
server.current_client = NULL;
}
三、发送回复给client
Redis命令会返回多种不同类型的回复。redis通过发回数据的第一个字节,
来区分回复的类型:
1、状态回复(status reply)的第一个字节是 "+"
eg:"+OK"
2、错误回复(error reply)的第一个字节是 "-"
eg:"-ERR unknown command 'foobar'"
3、整数回复(integer reply)的第一个字节是 ":",用于返回整数
eg:":1000\r\n"
4、批量回复(bulk reply)的第一个字节是 "$",用于返回二进制安全的字符串,最大程度512M
eg:"$6\r\nfoobar\r\n"
5、多条批量回复(multi bulk reply)的第一个字节是 "*",用于回复返回多个字的命令
eg:*2\r\n
:1\r\n
$6\r\n
foobar\r\n
对于回复的返回,redis都是先将回复内容写到缓冲(client->buf和client->replu),然后handleClientsWithPendingWrites将缓冲区中的回复发送给client。
写回复到缓冲的底层实现
//将字符串s添加到client->buf
int _addReplyToBuffer(client *c, const char *s, size_t len) {
size_t available = sizeof(c->buf)-c->bufpos;
if (c->flags & CLIENT_CLOSE_AFTER_REPLY) return C_OK;
//如果reply已存储内容,则回复不能存在buf中
if (listLength(c->reply) > 0) return C_ERR;
if (len > available) return C_ERR;
memcpy(c->buf+c->bufpos,s,len);//往response buf写数据
c->bufpos+=len;
return C_OK;
}
//将sds添加到client->reply的尾部
void _addReplySdsToList(client *c, sds s) {
……
if (listLength(c->reply) == 0) {
listAddNodeTail(c->reply,s);
c->reply_bytes += sdslen(s);
} else {
listNode *ln = listLast(c->reply);
sds tail = listNodeValue(ln);、
//节点元素为sds,节点可存放多个reply,但不能大于PROTO_REPLY_CHUNK_BYTES
if (tail && sdslen(tail)+sdslen(s) <= PROTO_REPLY_CHUNK_BYTES) {
tail = sdscatsds(tail,s);
listNodeValue(ln) = tail;
c->reply_bytes += sdslen(s);
sdsfree(s);
} else {
listAddNodeTail(c->reply,s);
c->reply_bytes += sdslen(s);
}
}//当output buf大于限制,将异步释放client
asyncCloseClientOnOutputBufferLimitReached(c);
}
//将robj添加到client->reply,逻辑与_addReplySdsToList一致,只是多了一步robj转成sds
void _addReplyObjectToList(client *c, robj *o);
//将字符串s添加到client->reply,逻辑与_addReplySdsToList一致,只是多了一步robj转成sds
void _addReplyStringToList(client *c, const char *s, size_t len);
handleClientsWithPendingWrites发送回复时需要遍历server.clients_pending_write,因此需要将client添加到clients_pending_write,此外,添加缓冲时有buf和reply的选择,redis对底层的缓冲操作有进行了封装,在封装中调用了prepareClientToWrite。
void addReplySds(client *c, sds s) {//添加sds到缓冲中
if (prepareClientToWrite(c) != C_OK) {
sdsfree(s);
return;
}
if (_addReplyToBuffer(c,s,sdslen(s)) == C_OK) {
sdsfree(s);
} else {
_addReplySdsToList(c,s);
}
}
void addReply(client *c, robj *obj);//添加robj到缓冲中
void addReplyString(client *c, const char *s, size_t len);//添加robj到缓冲中
int prepareClientToWrite(client *c) {
……
if (!clientHasPendingReplies(c) &&
!(c->flags & CLIENT_PENDING_WRITE) &&
(c->replstate == REPL_STATE_NONE ||
(c->replstate == SLAVE_STATE_ONLINE && !c->repl_put_online_on_ack)))
{//更新client的flag,添加client到clients_pending_write
c->flags |= CLIENT_PENDING_WRITE;
listAddNodeHead(server.clients_pending_write,c);
}
return C_OK;
}
回复的组装函数汇总
void addReplyErrorLength(client *c, const char *s, size_t len);//将错误回复格式化添加到缓冲
void addReplyError(client *c, const char *err);//封装addReplyErrorLength
void addReplyStatusLength(client *c, const char *s, size_t len);//将状态回复格式化添加到缓冲
void addReplyStatus(client *c, const char *status);//封装addReplyStatus
void addReplyDouble(client *c, double d);//将double转成string添加到缓冲
void addReplyLongLongWithPrefix(client *c, long long ll, char prefix);
//将ll包装成prefix ll\r\n(如果ll过大,转成string存储)添加到缓冲
void addReplyLongLong(client *c, long long ll);
//将整数回复格式化添加到缓冲,调用addReplyLongLongWithPrefix
void addReplyMultiBulkLen(client *c, long length);
//添加多条批回复的回复个数到缓冲,调用addReplyLongLongWithPrefix
void addReplyBulkLen(client *c, robj *obj);
//添加obj的长度(批回复)到缓冲,调用addReplyLongLongWithPrefix
void addReplyBulk(client *c, robj *obj);
//将批回复的内容格式化添加到缓冲,内容是obj
void addReplyBulkCBuffer(client *c, const void *p, size_t len);
//将批回复的内容格式化添加到缓冲,内容是字符串
void addReplyBulkSds(client *c, sds s);
//将批回复的内容格式化添加到缓冲,内容是sds
void addReplyBulkCString(client *c, const char *s);
//将批回复的内容格式化添加到缓冲,内容是string
void addReplyBulkLongLong(client *c, long long ll);
//将批回复的内容格式化添加到缓冲,内容是long long
发送缓冲中的回复给client
int handleClientsWithPendingWrites(void) {
listIter li;
listNode *ln;
int processed = listLength(server.clients_pending_write);
//遍历有回复缓冲的client
listRewind(server.clients_pending_write,&li);
while((ln = listNext(&li))) {
client *c = listNodeValue(ln);
c->flags &= ~CLIENT_PENDING_WRITE;
listDelNode(server.clients_pending_write,ln);
//将缓冲中的数据发送给client
if (writeToClient(c->fd,c,0) == C_ERR) continue;
//设置c->fd的可写事件的回调sendReplyToClient
if (clientHasPendingReplies(c) &&
aeCreateFileEvent(server.el, c->fd, AE_WRITABLE,
sendReplyToClient, c) == AE_ERR)
{
freeClientAsync(c);
}
}
return processed;
}
//writeToClient的简单封装,用于写事件的回调
void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask)