redis的复制第三版(异步复制之replica异步)
redis在2.2.0版本中引入了replica异步接收数据
replica连接上master,经过密码认证通过后(新加的密码认证过程,auth命令),replica发送SYNC命令,注册异步回调函数,不用阻塞等待。
- replica连接master
syncWithMaster() { ... int fd = anetTcpConnect(NULL,server.masterhost,server.masterport); ... }
- replica通过auth命令进行认证
syncWithMaster() { ... if(server.masterauth) {//配置了密码,master需要密码认证,则发送auth命令 //发送命令 snprintf(authcmd, 1024, "AUTH %s\r\n", server.masterauth); syncWrite(fd, authcmd, strlen(server.masterauth)+7, 5) ... //读取认证结果 syncReadLine(fd,buf,1024,3600) ... //处理认证结果 if (buf[0] != '+') { //认证失败 ... return REDIS_ERR; } } ... }
- replica发送sync命令
syncWithMaster() { ... syncWrite(fd,"SYNC \r\n",7,5) ... }
- 创建临时文件,注册异步回调函数
syncWithMaster() { ... //创建临时文件,后续接收的数据都将写入此文件 while(maxtries--) { snprintf(tmpfile,256, "temp-%d.%ld.rdb",(int)time(NULL),(long int)getpid()); dfd = open(tmpfile,O_CREAT|O_WRONLY|O_EXCL,0644); if (dfd != -1) break; sleep(1); } ... //注册异步回调读函数 aeCreateFileEvent(server.el, fd, AE_READABLE, readSyncBulkPayload, NULL) ... server.replstate = REDIS_REPL_TRANSFER; server.repl_transfer_left = -1; //还需要接收多少字节 server.repl_transfer_s = fd; //通信fd server.repl_transfer_fd = dfd; //临时文件fd server.repl_transfer_lastio = time(NULL); //最后接收数据时间,用于判断是否超时 server.repl_transfer_tmpfile = zstrdup(tmpfile); //临时文件名,后续接收完后,将rename为rdb文件名 ... }
- 回调异步函数接收数据
void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) { char buf[4096]; ssize_t nread, readlen; ... if (server.repl_transfer_left == -1) { //第一次回调,读取文件大小 syncReadLine(fd,buf,1024,3600) ... server.repl_transfer_left = strtol(buf+1,NULL,10); ... return; //直接退出了,等待下次的回调 } /* Read bulk data */ //读取数据 readlen = (server.repl_transfer_left < (signed)sizeof(buf)) ? server.repl_transfer_left : (signed)sizeof(buf); nread = read(fd,buf,readlen); ... //更新时间 server.repl_transfer_lastio = time(NULL); //将接收的数据写入本地临时文件 write(server.repl_transfer_fd,buf,nread) ... //更新剩余字节数 server.repl_transfer_left -= nread; /* Check if the transfer is now complete */ if (server.repl_transfer_left == 0) { //文件接收完成 //将临时文件切换成rdb文件 rename(server.repl_transfer_tmpfile,server.dbfilename) //清空数据库 emptyDb(); //删除可读事件 aeDeleteFileEvent(server.el,server.repl_transfer_s,AE_READABLE); //加载rdb文件 rdbLoad(server.dbfilename) /* Final setup of the connected slave <- master link */ zfree(server.repl_transfer_tmpfile); close(server.repl_transfer_fd); server.master = createClient(server.repl_transfer_s); server.master->flags |= REDIS_MASTER; server.master->authenticated = 1; server.replstate = REDIS_REPL_CONNECTED; redisLog(REDIS_NOTICE, "MASTER <-> SLAVE sync: Finished with success"); } }
redis的复制第四版(异步复制之断点续传)
在redis2.8.0引入了新的命令psync, 根据偏移量进行断点续传。
为了能断点续传,则master需要进行命令的缓存,这样才能根据replica提供的偏移量进行正确的数据传输。但是master也不能无休止的缓存下去,master的内存有限,不可能为了复制而把内存耗尽,最终使用了循环缓冲区。
replica
- 主动连接master
- 发送PING命令, 为了探测master是否已经启动完成,可以提供服务
- 如果需要密码,则通过AUTH发送密码进行认证
- 如果配置了监听端口(可能是docker等端口映射了),则通过REPLCONF命令发送给master,用于INFO命令的显示
- 通过PSYNC命令进行同步请求,第一次没有偏移量等信息,而且不知道master的runid(唯一标识符), 发送 PSYNC ? -1 (PSYNC Runid offset)
- 收到回复+FULLRESYNC
- 注册异步回调函数,进行文件的接收
- 接收rdb文件内容,写入本地文件
- 接收完后,加载rdb文件到内存中,恢复数据
- 继续接收后续命令,重放,和master保持一致
- 当网络出现问题,连接断开, 将会缓存偏移量等信息
- 当网络恢复后,进行重连
- 连接初始化等交互完成后,发送PSYNC runid offset
- 接收到 +CONTINUE, 则继续接受命令进行重放以保持和master一致
master
- 接收到PING命令,当自身已经一切准备就绪,则回复+PONG
- 接收到AUTH命令,进行密码校验,通过,则发送+OK,否则 -Error xxx
- 接收REPLCONF命令,解析,正常则返回+OK
- 接收到PSYNC命令,解析命令,进行runid的判断,是否是请求本服务器的请求?判断offset是否能从缓存中获取,返回+FULLRESYNC
- 判断是否有子进程存在,在则等待,否则生成子进程开始产生rdb文件
- 子进程结束,父进程注册异步回调函数进行文件传输
- 回调函数发送文件内容给replica
- 文件发送完成后,进行发送新的写命令给replica
- 当连接断开后,关闭连接
- 网络恢复后,replica连接上,进行一些列的交换后,接受到PSYNC runid offset
- 进过判断,offset在循环队列中,可以直接从队列中读取发送,发送+CONTINUE
- 发送完成后,后续持续的发送新的命令
代码逻辑如下:
replica
int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
...
//定时进行检测重连master以及传输失败, 每秒钟调用一次
run_with_period(1000) replicationCron();
...
}
void replicationCron(void) {
//连接阶段,或者PING命令的响应,超时了, 断开连接
/* Non blocking connection timeout? */
if (server.masterhost &&
(server.repl_state == REDIS_REPL_CONNECTING ||
server.repl_state == REDIS_REPL_RECEIVE_PONG) &&
(time(NULL)-server.repl_transfer_lastio) > server.repl_timeout)
{
redisLog(REDIS_WARNING,"Timeout connecting to the MASTER...");
undoConnectWithMaster();
}
//正在传输rdb文件内容超时了
/* Bulk transfer I/O timeout? */
if (server.masterhost && server.repl_state == REDIS_REPL_TRANSFER &&
(time(NULL)-server.repl_transfer_lastio) > server.repl_timeout)
{
redisLog(REDIS_WARNING,"Timeout receiving bulk data from MASTER... If the problem persists try to set the 'repl-timeout' parameter in redis.conf to a larger value.");
replicationAbortSyncTransfer();
}
//以及同步完成,在后续的同步新的命令时超时
/* Timed out master when we are an already connected slave? */
if (server.masterhost && server.repl_state == REDIS_REPL_CONNECTED &&
(time(NULL)-server.master->lastinteraction) > server.repl_timeout)
{
redisLog(REDIS_WARNING,"MASTER timeout: no data nor PING received...");
//这里将缓存runid,offset等信息
freeClient(server.master);
}
//开始连接master
/* Check if we should connect to a MASTER */
if (server.repl_state == REDIS_REPL_CONNECT) {
redisLog(REDIS_NOTICE,"Connecting to MASTER %s:%d",
server.masterhost, server.masterport);
//此函数是主要函数
if (connectWithMaster() == REDIS_OK) {
redisLog(REDIS_NOTICE,"MASTER <-> SLAVE sync started");
}
}
//发送ACK,并且将offst也一并发给master
/* Send ACK to master from time to time. */
if (server.masterhost && server.master)
replicationSendAck();
...
}
int connectWithMaster(void) {
int fd;
//主动连接master
fd = anetTcpNonBlockConnect(NULL,server.masterhost,server.masterport);
if (fd == -1) {
redisLog(REDIS_WARNING,"Unable to connect to MASTER: %s",
strerror(errno));
return REDIS_ERR;
}
//注册连接成功后的回调函数
if (aeCreateFileEvent(server.el,fd,AE_READABLE|AE_WRITABLE,syncWithMaster,NULL) ==
AE_ERR)
{
close(fd);
redisLog(REDIS_WARNING,"Can't create readable event for SYNC");
return REDIS_ERR;
}
server.repl_transfer_lastio = server.unixtime;
server.repl_transfer_s = fd;
server.repl_state = REDIS_REPL_CONNECTING;
return REDIS_OK;
}
void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) {
char tmpfile[256], *err;
int dfd, maxtries = 5;
int sockerr = 0, psync_result;
...
if (server.repl_state == REDIS_REPL_CONNECTING) {
...
server.repl_state = REDIS_REPL_RECEIVE_PONG;
/* Send the PING, don't check for errors at all, we have the timeout
* that will take care about this. */
syncWrite(fd,"PING\r\n",6,100);
return;
}
//读取PING的响应
/* Receive the PONG command. */
if (server.repl_state == REDIS_REPL_RECEIVE_PONG) {
char buf[1024];
...
/* Read the reply with explicit timeout. */
buf[0] = '\0';
if (syncReadLine(fd,buf,sizeof(buf),
server.repl_syncio_timeout*1000) == -1)
{
redisLog(REDIS_WARNING,
"I/O error reading PING reply from master: %s",
strerror(errno));
goto error;
}
/* We accept only two replies as valid, a positive +PONG reply
* (we just check for "+") or an authentication error.
* Note that older versions of Redis replied with "operation not
* permitted" instead of using a proper error code, so we test
* both. */
if (buf[0] != '+' &&
strncmp(buf,"-NOAUTH",7) != 0 &&
strncmp(buf,"-ERR operation not permitted",28) != 0)
{
redisLog(REDIS_WARNING,"Error reply to PING from master: '%s'",buf);
goto error;
} else {
redisLog(REDIS_NOTICE,
"Master replied to PING, replication can continue...");
}
}
//如果配置了密码要求,则发送AUTH命令
/* AUTH with the master if required. */
if(server.masterauth) {
err = sendSynchronousCommand(fd,"AUTH",server.masterauth,NULL);
if (err[0] == '-') {
redisLog(REDIS_WARNING,"Unable to AUTH to MASTER: %s",err);
sdsfree(err);
goto error;
}
sdsfree(err);
}
/* Set the slave port, so that Master's INFO command can list the
* slave listening port correctly. */
{
sds port = sdsfromlonglong(server.port);
err = sendSynchronousCommand(fd,"REPLCONF","listening-port",port,
NULL);
sdsfree(port);
/* Ignore the error if any, not all the Redis versions support
* REPLCONF listening-port. */
if (err[0] == '-') {
redisLog(REDIS_NOTICE,"(Non critical) Master does not understand REPLCONF listening-port: %s", err);
}
sdsfree(err);
}
//发送PSYNC请求
/* Try a partial resynchonization. If we don't have a cached master
* slaveTryPartialResynchronization() will at least try to use PSYNC
* to start a full resynchronization so that we get the master run id
* and the global offset, to try a partial resync at the next
* reconnection attempt. */
psync_result = slaveTryPartialResynchronization(fd);
if (psync_result == PSYNC_CONTINUE) {
redisLog(REDIS_NOTICE, "MASTER <-> SLAVE sync: Master accepted a Partial Resynchronization.");
return;
}
//不支持PSYNC命令,将降级到SYNC命令
/* Fall back to SYNC if needed. Otherwise psync_result == PSYNC_FULLRESYNC
* and the server.repl_master_runid and repl_master_initial_offset are
* already populated. */
if (psync_result == PSYNC_NOT_SUPPORTED) {
redisLog(REDIS_NOTICE,"Retrying with SYNC...");
if (syncWrite(fd,"SYNC\r\n",6,server.repl_syncio_timeout*1000) == -1) {
redisLog(REDIS_WARNING,"I/O error writing to MASTER: %s",
strerror(errno));
goto error;
}
}
//准备一个临时文件
/* Prepare a suitable temp file for bulk transfer */
while(maxtries--) {
snprintf(tmpfile,256,
"temp-%d.%ld.rdb",(int)server.unixtime,(long int)getpid());
dfd = open(tmpfile,O_CREAT|O_WRONLY|O_EXCL,0644);
if (dfd != -1) break;
sleep(1);
}
if (dfd == -1) {
redisLog(REDIS_WARNING,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno));
goto error;
}
//注册异步回调函数,进行rdb文件的接受
/* Setup the non blocking download of the bulk file. */
if (aeCreateFileEvent(server.el,fd, AE_READABLE,readSyncBulkPayload,NULL)
== AE_ERR)
{
redisLog(REDIS_WARNING,
"Can't create readable event for SYNC: %s (fd=%d)",
strerror(errno),fd);
goto error;
}
server.repl_state = REDIS_REPL_TRANSFER;
server.repl_transfer_size = -1;
server.repl_transfer_read = 0;
server.repl_transfer_last_fsync_off = 0;
server.repl_transfer_fd = dfd;
server.repl_transfer_lastio = server.unixtime;
server.repl_transfer_tmpfile = zstrdup(tmpfile);
return;
error:
close(fd);
server.repl_transfer_s = -1;
server.repl_state = REDIS_REPL_CONNECT;
return;
}
int slaveTryPartialResynchronization(int fd) {
char *psync_runid;
char psync_offset[32];
sds reply;
...
server.repl_master_initial_offset = -1;
if (server.cached_master) { //重连时,从缓存中获取offset和runid
psync_runid = server.cached_master->replrunid;
snprintf(psync_offset,sizeof(psync_offset),"%lld", server.cached_master->reploff+1);
...
} else { //否则将发送 ? -1 ,进行全量同步
...
psync_runid = "?";
memcpy(psync_offset,"-1",3);
}
/* Issue the PSYNC command */
reply = sendSynchronousCommand(fd,"PSYNC",psync_runid,psync_offset,NULL);
//处理响应
if (!strncmp(reply,"+FULLRESYNC",11)) { //需要全量同步
...
//提前runid
runid = strchr(reply,' ');
if (runid) {
runid++;
offset = strchr(runid,' ');
if (offset) offset++;
}
...
memcpy(server.repl_master_runid, runid, offset-runid-1);
server.repl_master_runid[REDIS_RUN_ID_SIZE] = '\0';
server.repl_master_initial_offset = strtoll(offset,NULL,10);
...
/* We are going to full resync, discard the cached master structure. */
replicationDiscardCachedMaster();
sdsfree(reply);
return PSYNC_FULLRESYNC;
}
if (!strncmp(reply,"+CONTINUE",9)) { //可以继续断点续传
...
sdsfree(reply);
replicationResurrectCachedMaster(fd); //从缓存中恢复各种状态
return PSYNC_CONTINUE;
}
//错误判断
...
sdsfree(reply);
replicationDiscardCachedMaster();
return PSYNC_NOT_SUPPORTED;
}
master
void syncCommand(redisClient *c) {
/* ignore SYNC if already slave or in monitor mode */
if (c->flags & REDIS_SLAVE) return;
/* Refuse SYNC requests if we are a slave but the link with our master
* is not ok... */
if (server.masterhost && server.repl_state != REDIS_REPL_CONNECTED) {
addReplyError(c,"Can't SYNC while not connected with my master");
return;
}
/* SYNC can't be issued when the server has pending data to send to
* the client about already issued commands. We need a fresh reply
* buffer registering the differences between the BGSAVE and the current
* dataset, so that we can copy to other slaves if needed. */
if (listLength(c->reply) != 0 || c->bufpos != 0) {
addReplyError(c,"SYNC and PSYNC are invalid with pending output");
return;
}
redisLog(REDIS_NOTICE,"Slave asks for synchronization");
/* Try a partial resynchronization if this is a PSYNC command.
* If it fails, we continue with usual full resynchronization, however
* when this happens masterTryPartialResynchronization() already
* replied with:
*
* +FULLRESYNC <runid> <offset>
*
* So the slave knows the new runid and offset to try a PSYNC later
* if the connection with the master is lost. */
if (!strcasecmp(c->argv[0]->ptr,"psync")) {
//判断是否能断点续传
if (masterTryPartialResynchronization(c) == REDIS_OK) {
server.stat_sync_partial_ok++;
return; /* No full resync needed, return. */
} else {
char *master_runid = c->argv[1]->ptr;
/* Increment stats for failed PSYNCs, but only if the
* runid is not "?", as this is used by slaves to force a full
* resync on purpose when they are not albe to partially
* resync. */
if (master_runid[0] != '?') server.stat_sync_partial_err++;
}
} else {
/* If a slave uses SYNC, we are dealing with an old implementation
* of the replication protocol (like redis-cli --slave). Flag the client
* so that we don't expect to receive REPLCONF ACK feedbacks. */
c->flags |= REDIS_PRE_PSYNC_SLAVE;
}
/* Full resynchronization. */
server.stat_sync_full++;
//需要全量同步,将产生子进程进行处理
/* Here we need to check if there is a background saving operation
* in progress, or if it is required to start one */
if (server.rdb_child_pid != -1) { //有子进程,需要等待
/* Ok a background save is in progress. Let's check if it is a good
* one for replication, i.e. if there is another slave that is
* registering differences since the server forked to save */
redisClient *slave;
listNode *ln;
listIter li;
//查询是否有正在等待的replica
listRewind(server.slaves,&li);
while((ln = listNext(&li))) {
slave = ln->value;
if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_END) break;
}
if (ln) { //有相同状态的replica,将它的缓存拷贝,并且可以使用同一份rdb文件,不用再产生子进程
/* Perfect, the server is already registering differences for
* another slave. Set the right state, and copy the buffer. */
copyClientOutputBuffer(c,slave);
c->replstate = REDIS_REPL_WAIT_BGSAVE_END;
redisLog(REDIS_NOTICE,"Waiting for end of BGSAVE for SYNC");
} else { //需要等待子进程结束,然后生成子进程进行rdb
/* No way, we need to wait for the next BGSAVE in order to
* register differences */
c->replstate = REDIS_REPL_WAIT_BGSAVE_START;
redisLog(REDIS_NOTICE,"Waiting for next BGSAVE for SYNC");
}
} else {
/* Ok we don't have a BGSAVE in progress, let's start one */
redisLog(REDIS_NOTICE,"Starting BGSAVE for SYNC");
//没有子进程,现在可以立刻产生子进程进行rdb
if (rdbSaveBackground(server.rdb_filename) != REDIS_OK) {
redisLog(REDIS_NOTICE,"Replication failed, can't BGSAVE");
addReplyError(c,"Unable to perform background save");
return;
}
c->replstate = REDIS_REPL_WAIT_BGSAVE_END;
/* Flush the script cache for the new slave. */
replicationScriptCacheFlush();
}
if (server.repl_disable_tcp_nodelay)
anetDisableTcpNoDelay(NULL, c->fd); /* Non critical if it fails. */
c->repldbfd = -1;
c->flags |= REDIS_SLAVE;
server.slaveseldb = -1; /* Force to re-emit the SELECT command. */
//将当前replica加入到replica队列中
listAddNodeTail(server.slaves,c);
//当第一个replica到达,并且缓存队列为空,则创建缓存队列
if (listLength(server.slaves) == 1 && server.repl_backlog == NULL)
createReplicationBacklog();
return;
}
int masterTryPartialResynchronization(redisClient *c) {
long long psync_offset, psync_len;
char *master_runid = c->argv[1]->ptr;
char buf[128];
int buflen;
//判断是否是请求当前服务器的,(可能是master挂了,此replica切换为master),不是则需要全量同步
/* Is the runid of this master the same advertised by the wannabe slave
* via PSYNC? If runid changed this master is a different instance and
* there is no way to continue. */
if (strcasecmp(master_runid, server.runid)) {
/* Run id "?" is used by slaves that want to force a full resync. */
if (master_runid[0] != '?') {
redisLog(REDIS_NOTICE,"Partial resynchronization not accepted: "
"Runid mismatch (Client asked for '%s', I'm '%s')",
master_runid, server.runid);
} else {
redisLog(REDIS_NOTICE,"Full resync requested by slave.");
}
goto need_full_resync;
}
//获取请求的offset
/* We still have the data our slave is asking for? */
if (getLongLongFromObjectOrReply(c,c->argv[2],&psync_offset,NULL) !=
REDIS_OK) goto need_full_resync;
//判断offset是否在缓存区范围内,不在则需要全量同步
if (!server.repl_backlog ||
psync_offset < server.repl_backlog_off ||
psync_offset > (server.repl_backlog_off + server.repl_backlog_histlen))
{
redisLog(REDIS_NOTICE,
"Unable to partial resync with the slave for lack of backlog (Slave request was: %lld).", psync_offset);
if (psync_offset > server.master_repl_offset) {
redisLog(REDIS_WARNING,
"Warning: slave tried to PSYNC with an offset that is greater than the master replication offset.");
}
goto need_full_resync;
}
/* If we reached this point, we are able to perform a partial resync:
* 1) Set client state to make it a slave.
* 2) Inform the client we can continue with +CONTINUE
* 3) Send the backlog data (from the offset to the end) to the slave. */
c->flags |= REDIS_SLAVE;
c->replstate = REDIS_REPL_ONLINE;
c->repl_ack_time = server.unixtime;
listAddNodeTail(server.slaves,c);
/* We can't use the connection buffers since they are used to accumulate
* new commands at this stage. But we are sure the socket send buffer is
* emtpy so this write will never fail actually. */
buflen = snprintf(buf,sizeof(buf),"+CONTINUE\r\n"); //发送continue
if (write(c->fd,buf,buflen) != buflen) {
freeClientAsync(c);
return REDIS_OK;
}
//发送offset到最新的数据
psync_len = addReplyReplicationBacklog(c,psync_offset);
redisLog(REDIS_NOTICE,
"Partial resynchronization request accepted. Sending %lld bytes of backlog starting from offset %lld.", psync_len, psync_offset);
/* Note that we don't need to set the selected DB at server.slaveseldb
* to -1 to force the master to emit SELECT, since the slave already
* has this state from the previous connection with the master. */
refreshGoodSlavesCount();
return REDIS_OK; /* The caller can return, no full resync needed. */
need_full_resync:
/* We need a full resync for some reason... notify the client. */
psync_offset = server.master_repl_offset;
/* Add 1 to psync_offset if it the replication backlog does not exists
* as when it will be created later we'll increment the offset by one. */
if (server.repl_backlog == NULL) psync_offset++;
/* Again, we can't use the connection buffers (see above). */
buflen = snprintf(buf,sizeof(buf),"+FULLRESYNC %s %lld\r\n",
server.runid,psync_offset);
if (write(c->fd,buf,buflen) != buflen) {
freeClientAsync(c);
return REDIS_OK;
}
return REDIS_ERR;
}
//子进程结束后的处理函数
void backgroundSaveDoneHandler(int exitcode, int bysignal) {
...
updateSlavesWaitingBgsave(exitcode == 0 ? REDIS_OK : REDIS_ERR);
...
}
void updateSlavesWaitingBgsave(int bgsaveerr) {
listNode *ln;
int startbgsave = 0;
listIter li;
//遍历所有的replica
listRewind(server.slaves,&li);
while((ln = listNext(&li))) {
redisClient *slave = ln->value;
if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START) { //在等待上一次rdb结束的
startbgsave = 1; //可以开始rdb生成了
slave->replstate = REDIS_REPL_WAIT_BGSAVE_END;
} else if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_END) {
struct redis_stat buf;
if (bgsaveerr != REDIS_OK) {
freeClient(slave);
redisLog(REDIS_WARNING,"SYNC failed. BGSAVE child returned an error");
continue;
}
//打开生成的rdb二五年间
if ((slave->repldbfd = open(server.rdb_filename,O_RDONLY)) == -1 ||
redis_fstat(slave->repldbfd,&buf) == -1) {
freeClient(slave);
redisLog(REDIS_WARNING,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno));
continue;
}
slave->repldboff = 0;
slave->repldbsize = buf.st_size;
slave->replstate = REDIS_REPL_SEND_BULK;
aeDeleteFileEvent(server.el,slave->fd,AE_WRITABLE);
//注册异步回调函数,将文件发送给replica
if (aeCreateFileEvent(server.el, slave->fd, AE_WRITABLE, sendBulkToSlave, slave) == AE_ERR) {
freeClient(slave);
continue;
}
}
}
//需要开始子进程生成rdb
if (startbgsave) {
/* Since we are starting a new background save for one or more slaves,
* we flush the Replication Script Cache to use EVAL to propagate every
* new EVALSHA for the first time, since all the new slaves don't know
* about previous scripts. */
replicationScriptCacheFlush();
//开启子进程进行rdb生成
if (rdbSaveBackground(server.rdb_filename) != REDIS_OK) {
...
}
}
}
}
void sendBulkToSlave(aeEventLoop *el, int fd, void *privdata, int mask) {
redisClient *slave = privdata;
...
if (slave->repldboff == 0) {
//发送文件大小
bulkcount = sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long)
slave->repldbsize);
if (write(fd,bulkcount,sdslen(bulkcount)) != (signed)sdslen(bulkcount))
{
sdsfree(bulkcount);
freeClient(slave);
return;
}
sdsfree(bulkcount);
}
lseek(slave->repldbfd,slave->repldboff,SEEK_SET);
buflen = read(slave->repldbfd,buf,REDIS_IOBUF_LEN); //读取内容
if (buflen <= 0) {
redisLog(REDIS_WARNING,"Read error sending DB to slave: %s",
(buflen == 0) ? "premature EOF" : strerror(errno));
freeClient(slave);
return;
}
if ((nwritten = write(fd,buf,buflen)) == -1) { //发送内容
redisLog(REDIS_VERBOSE,"Write error sending DB to slave: %s",
strerror(errno));
freeClient(slave);
return;
}
slave->repldboff += nwritten;
if (slave->repldboff == slave->repldbsize) { //发送完成
close(slave->repldbfd);
slave->repldbfd = -1;
aeDeleteFileEvent(server.el,slave->fd,AE_WRITABLE);
slave->replstate = REDIS_REPL_ONLINE;
slave->repl_ack_time = server.unixtime;
//注册异步回调函数,将后续的命令发送给replica
if (aeCreateFileEvent(server.el, slave->fd, AE_WRITABLE,
sendReplyToClient, slave) == AE_ERR) {
freeClient(slave);
return;
}
refreshGoodSlavesCount();
redisLog(REDIS_NOTICE,"Synchronization with slave succeeded");
}
}