文章目录
复制定时函数中,准备连接主
在replicationCron中,调用函数connectWithMaster()
/* Check if we should connect to a MASTER */
if (server.repl_state == REPL_STATE_CONNECT) {
serverLog(LL_NOTICE,"Connecting to MASTER %s:%d",
server.masterhost, server.masterport);
connectWithMaster();
}
与主连接,调用syncWithMaster
在connectWithMaster(),调用syncWithMaster(),初始化并调整参数
int connectWithMaster(void) {
server.repl_transfer_s = connCreate(connTypeOfReplication());
if (connConnect(server.repl_transfer_s, server.masterhost, server.masterport,
server.bind_source_addr, syncWithMaster) == C_ERR) {
serverLog(LL_WARNING,"Unable to connect to MASTER: %s",
connGetLastError(server.repl_transfer_s));
connClose(server.repl_transfer_s);
server.repl_transfer_s = NULL;
return C_ERR;
}
server.repl_transfer_lastio = server.unixtime;
server.repl_state = REPL_STATE_CONNECTING;
serverLog(LL_NOTICE,"MASTER <-> REPLICA sync started");
return C_OK;
}
然后开始本文最重要的内容 :解析syncWithMaster
syncWithMaster函数的逻辑如下
验证状态是不是空
空的话 return ASAP
if (server.repl_state == REPL_STATE_NONE) {
connClose(conn);
return;
}
验证状态是否为正在准备连接
/* Check for errors in the socket: after a non blocking connect() we
* may find that the socket is in error state. */
if (connGetState(conn) != CONN_STATE_CONNECTED) {
serverLog(LL_WARNING,"Error condition on socket for SYNC: %s",
connGetLastError(conn));
goto error;
}
如果状态不为 CONN_STATE_CONNECTED,日志处理后跳转至error处理
验证从的复制状态是不是REPL_STATE_CONNECTING
/* Send a PING to check the master is able to reply without errors. */
if (server.repl_state == REPL_STATE_CONNECTING) {
serverLog(LL_NOTICE,"Non blocking connect for SYNC fired the event.");
/* Delete the writable event so that the readable event remains
* registered and we can wait for the PONG reply. */
connSetReadHandler(conn, syncWithMaster);
connSetWriteHandler(conn, NULL);
server.repl_state = REPL_STATE_RECEIVE_PING_REPLY;
/* Send the PING, don't check for errors at all, we have the timeout
* that will take care about this. */
err = sendCommand(conn,"PING",NULL);
if (err) goto write_error;
return;
}
设置读的回调函数为syncWithMaster,保持读的注册,写的回调为空,调整复制状态为等待ping,并发送ping命令给主。不用担心错误,因为在其他地方有超时处理。
验证是不是等待ping状态
/* Receive the PONG command. */
if (server.repl_state == REPL_STATE_RECEIVE_PING_REPLY) {
err = receiveSynchronousResponse(conn);
/* The master did not reply */
if (err == NULL) goto no_response_error;
/* We accept only two replies as valid, a positive +PONG reply
* (we just check for "+") or an authentication error.
* Note that older versions of Redis replied with "operation not
* permitted" instead of using a proper error code, so we test both. */
if (err[0] != '+' &&
strncmp(err,"-NOAUTH",7) != 0 &&
strncmp(err,"-NOPERM",7) != 0 &&
strncmp(err,"-ERR operation not permitted",28) != 0)
{
serverLog(LL_WARNING,"Error reply to PING from master: '%s'",err);
sdsfree(err);
goto error;
} else {
serverLog(LL_NOTICE,
"Master replied to PING, replication can continue...");
}
sdsfree(err);
err = NULL;
server.repl_state = REPL_STATE_SEND_HANDSHAKE;
}
是的话处理同步回复,同步回复为空,主没有回复,就开始进行没有回复的处理。
主回复了:
1.+PONG -NOAUTH -NOPERM -ERR operation not permitted
不是四种中的任意一种,说明得到的回复并不是ping的回复
2.是的话,接收到了ping的回复,释放回复,状态设为REPL_STATE_SEND_HANDSHAKE
验证是不是REPL_STATE_SEND_HANDSHAKE
需要密码验证
if (server.repl_state == REPL_STATE_SEND_HANDSHAKE) {
/* AUTH with the master if required. */
if (server.masterauth) {
char *args[3] = {"AUTH",NULL,NULL};
size_t lens[3] = {4,0,0};
int argc = 1;
if (server.masteruser) {
args[argc] = server.masteruser;
lens[argc] = strlen(server.masteruser);
argc++;
}
args[argc] = server.masterauth;
lens[argc] = sdslen(server.masterauth);
argc++;
err = sendCommandArgv(conn, argc, args, lens);
if (err) goto write_error;
}
将参数填好后,使用sendCommandArgv多批量生成指令。用于验证用户和密码
设置从端口
pps:没有条件 直接进入了模块 {}包起来 我也是第一次见这种
/* Set the slave port, so that Master's INFO command can list the
* slave listening port correctly. */
{
int port;
if (server.slave_announce_port)
port = server.slave_announce_port;
else if (server.tls_replication && server.tls_port)
port = server.tls_port;
else
port = server.port;
sds portstr = sdsfromlonglong(port);
err = sendCommand(conn,"REPLCONF",
"listening-port",portstr, NULL);
sdsfree(portstr);
if (err) goto write_error;
}
设置从的IP
/* Set the slave ip, so that Master's INFO command can list the
* slave IP address port correctly in case of port forwarding or NAT.
* Skip REPLCONF ip-address if there is no slave-announce-ip option set. */
if (server.slave_announce_ip) {
err = sendCommand(conn,"REPLCONF",
"ip-address",server.slave_announce_ip, NULL);
if (err) goto write_error;
}
告知主,从服务器支持的功能
/* Inform the master of our (slave) capabilities.
*
* EOF: supports EOF-style RDB transfer for diskless replication.
* PSYNC2: supports PSYNC v2, so understands +CONTINUE <new repl ID>.
*
* The master will ignore capabilities it does not understand. */
err = sendCommand(conn,"REPLCONF",
"capa","eof","capa","psync2",NULL);
if (err) goto write_error;
server.repl_state = REPL_STATE_RECEIVE_AUTH_REPLY;
return;
}
给主发送各种握手信息并成功拿到返回后,状态变更为等待验证回复
接受验证回复
/* Receive AUTH reply. */
if (server.repl_state == REPL_STATE_RECEIVE_AUTH_REPLY) {
err = receiveSynchronousResponse(conn);
if (err == NULL) goto no_response_error;
if (err[0] == '-') {
serverLog(LL_WARNING,"Unable to AUTH to MASTER: %s",err);
sdsfree(err);
goto error;
}
sdsfree(err);
err = NULL;
server.repl_state = REPL_STATE_RECEIVE_PORT_REPLY;
return;
}
状态为等待变更回复时,没有返回值或者返回以-开头。(-开头表示错误信息),都进行错误处理,否则将状态变更为REPL_STATE_RECEIVE_PORT_REPLY,等待REPLCONF的回复。
等待REPLCONF回复
/* Receive REPLCONF listening-port reply. */
if (server.repl_state == REPL_STATE_RECEIVE_PORT_REPLY) {
err = receiveSynchronousResponse(conn);
if (err == NULL) goto no_response_error;
/* Ignore the error if any, not all the Redis versions support
* REPLCONF listening-port. */
if (err[0] == '-') {
serverLog(LL_NOTICE,"(Non critical) Master does not understand "
"REPLCONF listening-port: %s", err);
}
sdsfree(err);
server.repl_state = REPL_STATE_RECEIVE_IP_REPLY;
return;
}
发送复制请求后,等待回复,出错则打印到日志:主不懂复制的监听端口
等待复制回复
变更能力
if (server.repl_state == REPL_STATE_RECEIVE_IP_REPLY && !server.slave_announce_ip)
server.repl_state = REPL_STATE_RECEIVE_CAPA_REPLY;
如果状态为等待复制回复且没有从的ip,那么改变状态为REPL_STATE_RECEIVE_CAPA_REPLY,也是等待复制回复类别。
没有announce_ip
/* Receive REPLCONF ip-address reply. */
if (server.repl_state == REPL_STATE_RECEIVE_IP_REPLY) {
err = receiveSynchronousResponse(conn);
if (err == NULL) goto no_response_error;
/* Ignore the error if any, not all the Redis versions support
* REPLCONF ip-address. */
if (err[0] == '-') {
serverLog(LL_NOTICE,"(Non critical) Master does not understand "
"REPLCONF ip-address: %s", err);
}
sdsfree(err);
server.repl_state = REPL_STATE_RECEIVE_CAPA_REPLY;
return;
}
收到ip-address的回复,状态变更为REPL_STATE_RECEIVE_CAPA_REPLY
有announce_ip或者已经收到ip-address的回复
/* Receive CAPA reply. */
if (server.repl_state == REPL_STATE_RECEIVE_CAPA_REPLY) {
err = receiveSynchronousResponse(conn);
if (err == NULL) goto no_response_error;
/* Ignore the error if any, not all the Redis versions support
* REPLCONF capa. */
if (err[0] == '-') {
serverLog(LL_NOTICE,"(Non critical) Master does not understand "
"REPLCONF capa: %s", err);
}
sdsfree(err);
err = NULL;
server.repl_state = REPL_STATE_SEND_PSYNC;
}
获得能力的回复,状态变更为REPL_STATE_SEND_PSYNC,发送psyn指令
发送PSYNC,尝试增量同步(部分同步)
/* Try a partial resynchronization. If we don't have a cached master
* slaveTryPartialResynchronization() will at least try to use PSYNC
* to start a full resynchronization so that we get the master replid
* and the global offset, to try a partial resync at the next
* reconnection attempt. */
if (server.repl_state == REPL_STATE_SEND_PSYNC) {
if (slaveTryPartialResynchronization(conn,0) == PSYNC_WRITE_ERROR) {
err = sdsnew("Write error sending the PSYNC command.");
abortFailover("Write error to failover target");
goto write_error;
}
server.repl_state = REPL_STATE_RECEIVE_PSYNC_REPLY;
return;
}
如果主没有缓存,函数会至少使用psync来进行全同步,所以可以获得到主的replid和偏移量,以便下次尝试部分增量同步。
状态变为等待psync回复。
等待获取psync回复
首先,能到这个位置一定为REPL_STATE_RECEIVE_PSYNC_REPLY
所以serverlog其实是不可达的 但是为了容错和完备 添加了这段代码,然后开始尝试部分同步
/* If reached this point, we should be in REPL_STATE_RECEIVE_PSYNC_REPLY. */
if (server.repl_state != REPL_STATE_RECEIVE_PSYNC_REPLY) {
serverLog(LL_WARNING,"syncWithMaster(): state machine error, "
"state should be RECEIVE_PSYNC but is %d",
server.repl_state);
goto error;
}
psync_result = slaveTryPartialResynchronization(conn,1);
尝试部分同步
psync_result = slaveTryPartialResynchronization(conn,1);
if (psync_result == PSYNC_WAIT_REPLY) return; /* Try again later... */
刚开始 read_reply为0,最开始的offset被设为-1,说明当前的复制和偏移量无效
先判断server的用于psync的master是不是存在,不存在则部分同步不可能
int slaveTryPartialResynchronization(connection *conn, int read_reply) {
char *psync_replid;
char psync_offset[32];
sds reply;
/* Writing half */
if (!read_reply) {
/* Initially set master_initial_offset to -1 to mark the current
* master replid and offset as not valid. Later if we'll be able to do
* a FULL resync using the PSYNC command we'll set the offset at the
* right value, so that this information will be propagated to the
* client structure representing the master into server.master. */
server.master_initial_offset = -1;
if (server.cached_master) {
psync_replid = server.cached_master->replid;
snprintf(psync_offset,sizeof(psync_offset),"%lld", server.cached_master->reploff+1);
serverLog(LL_NOTICE,"Trying a partial resynchronization (request %s:%s).", psync_replid, psync_offset);
} else {
serverLog(LL_NOTICE,"Partial resynchronization not possible (no cached master)");
psync_replid = "?";
memcpy(psync_offset,"-1",3);
}
如果server处于failover状态,发送当前的replid和offset给master
/* Issue the PSYNC command, if this is a master with a failover in
* progress then send the failover argument to the replica to cause it
* to become a master */
if (server.failover_state == FAILOVER_IN_PROGRESS) {
reply = sendCommand(conn,"PSYNC",psync_replid,psync_offset,"FAILOVER",NULL);
} else {
reply = sendCommand(conn,"PSYNC",psync_replid,psync_offset,NULL);
}
如果回复为空,那么发送失败,重新设置读的句柄,然后返回
if (reply != NULL) {
serverLog(LL_WARNING,"Unable to send PSYNC to master: %s",reply);
sdsfree(reply);
connSetReadHandler(conn, NULL);
return PSYNC_WRITE_ERROR;
}
return PSYNC_WAIT_REPLY;
}
然后获取同步的回复,解析回复
回复为空,返回等下尝试的状态
/* Reading half */
reply = receiveSynchronousResponse(conn);
/* Master did not reply to PSYNC */
if (reply == NULL) {
connSetReadHandler(conn, NULL);
serverLog(LL_WARNING, "Master did not reply to PSYNC, will try later");
return PSYNC_TRY_LATER;
}
回复不为空,但是master回复了空行,保持当前连接
if (sdslen(reply) == 0) {
/* The master may send empty newlines after it receives PSYNC
* and before to reply, just to keep the connection alive. */
sdsfree(reply);
return PSYNC_WAIT_REPLY;
}
如果回复是 +FULLRESYNC,解析出replid和复制偏移量
connSetReadHandler(conn, NULL);
if (!strncmp(reply,"+FULLRESYNC",11)) {
char *replid = NULL, *offset = NULL;
/* FULL RESYNC, parse the reply in order to extract the replid
* and the replication offset. */
replid = strchr(reply,' ');
if (replid) {
replid++;
offset = strchr(replid,' ');
if (offset) offset++;
}
if (!replid || !offset || (offset-replid-1) != CONFIG_RUN_ID_SIZE) {
serverLog(LL_WARNING,
"Master replied with wrong +FULLRESYNC syntax.");
/* This is an unexpected condition, actually the +FULLRESYNC
* reply means that the master supports PSYNC, but the reply
* format seems wrong. To stay safe we blank the master
* replid to make sure next PSYNCs will fail. */
memset(server.master_replid,0,CONFIG_RUN_ID_SIZE+1);
} else {
memcpy(server.master_replid, replid, offset-replid-1);
server.master_replid[CONFIG_RUN_ID_SIZE] = '\0';
server.master_initial_offset = strtoll(offset,NULL,10);
serverLog(LL_NOTICE,"Full resync from master: %s:%lld",
server.master_replid,
server.master_initial_offset);
}
sdsfree(reply);
return PSYNC_FULLRESYNC;
}
如果主回复 +CONTINUE 说明可以进行部分偏移复制
if (!strncmp(reply,"+CONTINUE",9)) {
/* Partial resync was accepted. */
serverLog(LL_NOTICE,
"Successful partial resynchronization with master.");
//if还没结束
//检查master发布的新的复制ID 如果变化了 更新ID secondary ID设为旧的ID 知道当前偏移量
//这样的话sub-slave断开了还能进行PSYNC
/* Check the new replication ID advertised by the master. If it
* changed, we need to set the new ID as primary ID, and set
* secondary ID as the old master ID up to the current offset, so
* that our sub-slaves will be able to PSYNC with us after a
* disconnection. */
char *start = reply+10;
char *end = reply+9;
while(end[0] != '\r' && end[0] != '\n' && end[0] != '\0') end++;
if (end-start == CONFIG_RUN_ID_SIZE) {
char new[CONFIG_RUN_ID_SIZE+1];
memcpy(new,start,CONFIG_RUN_ID_SIZE);
new[CONFIG_RUN_ID_SIZE] = '\0';
if (strcmp(new,server.cached_master->replid)) {
/* Master ID changed. */
serverLog(LL_WARNING,"Master replication ID changed to %s",new);
/* Set the old ID as our ID2, up to the current offset+1. */
memcpy(server.replid2,server.cached_master->replid,
sizeof(server.replid2));
server.second_replid_offset = server.master_repl_offset+1;
/* Update the cached master ID and our own primary ID to the
* new one. */
memcpy(server.replid,new,sizeof(server.replid));
memcpy(server.cached_master->replid,new,sizeof(server.replid));
/* Disconnect all the sub-slaves: they need to be notified. */
disconnectSlaves();
}
}
/* Setup the replication to continue. */
sdsfree(reply);
replicationResurrectCachedMaster(conn);
//将缓存的主节点转换为当前主节点,使用作为参数传递的文件描述符作为新主节点的套接字
//此函数在成功设置部分重新同步时被调用,因此我们将接收的数据流将从主节点离开的位置开始。* /
//如果这个实例被重新启动,并且我们将元数据从持久性文件读取到PSYNC,那么我们的复制backlog可能仍然没有初始化。创建它。* /
/* If this instance was restarted and we read the metadata to
* PSYNC from the persistence file, our replication backlog could
* be still not initialized. Create it. */
if (server.repl_backlog == NULL) createReplicationBacklog();
return PSYNC_CONTINUE;
}
如果我们到达这一点,我们会收到一个错误(因为主服务器不理解PSYNC,或者因为它处于一个特殊的状态,不能为我们的请求服务),
或者来自主服务器的意外回复。
如果我们不理解错误,则返回PSYNC_NOT_SUPPORTED,
否则如果我们认为这是一个短暂错误,则返回PSYNC_TRY_LATER
/* If we reach this point we received either an error (since the master does
* not understand PSYNC or because it is in a special state and cannot
* serve our request), or an unexpected reply from the master.
*
* Return PSYNC_NOT_SUPPORTED on errors we don't understand, otherwise
* return PSYNC_TRY_LATER if we believe this is a transient error. */
if (!strncmp(reply,"-NOMASTERLINK",13) ||
!strncmp(reply,"-LOADING",8))
{
serverLog(LL_NOTICE,
"Master is currently unable to PSYNC "
"but should be in the future: %s", reply);
sdsfree(reply);
return PSYNC_TRY_LATER;
}
if (strncmp(reply,"-ERR",4)) {
/* If it's not an error, log the unexpected event. */
serverLog(LL_WARNING,
"Unexpected reply to PSYNC from master: %s", reply);
} else {
serverLog(LL_NOTICE,
"Master does not support PSYNC or is in "
"error state (reply: %s)", reply);
}
sdsfree(reply);
return PSYNC_NOT_SUPPORTED;
}
验证是不是正在故障转移
检查计划的故障切换状态。
我们期望PSYNC_CONTINUE,但是完全重新同步在技术上没有问题,这可能发生在边缘情况下。
/* Check the status of the planned failover. We expect PSYNC_CONTINUE,
* but there is nothing technically wrong with a full resync which
* could happen in edge cases. */
if (server.failover_state == FAILOVER_IN_PROGRESS) {
if (psync_result == PSYNC_CONTINUE || psync_result == PSYNC_FULLRESYNC) {
clearFailoverState();
//清除failover的状态,重置server的参数
} else {
//中止failover,如果正在故障转移,取消,并将实例设为主;再进行清楚failover状态
abortFailover("Failover target rejected psync request");
return;
}
}
验证是不是稍后进行
加载数据库或者断连时,稍后进行
/* If the master is in an transient error, we should try to PSYNC
* from scratch later, so go to the error path. This happens when
* the server is loading the dataset or is not connected with its
* master and so forth. */
if (psync_result == PSYNC_TRY_LATER) goto error;
验证是不是继续psync状态
如果是PSYNC_CONTINUE且开始部分同步
/* Note: if PSYNC does not return WAIT_REPLY, it will take care of
* uninstalling the read handler from the file descriptor. */
if (psync_result == PSYNC_CONTINUE) {
serverLog(LL_NOTICE, "MASTER <-> REPLICA sync: Master accepted a Partial Resynchronization.");
if (server.supervised_mode == SUPERVISED_SYSTEMD) {
redisCommunicateSystemd("STATUS=MASTER <-> REPLICA sync: Partial Resynchronization accepted. Ready to accept connections in read-write mode.\n");
}
return;
}
发送一个通知消息,返回正表明通知成功
/* Send a notify message to systemd. Returns sd_notify return code which is
* a positive number on success. */
int redisCommunicateSystemd(const char *sd_notify_msg) {
#ifdef HAVE_LIBSYSTEMD
int ret = sd_notify(0, sd_notify_msg);
if (ret == 0)
serverLog(LL_WARNING, "systemd supervision error: NOTIFY_SOCKET not found!");
else if (ret < 0)
serverLog(LL_WARNING, "systemd supervision error: sd_notify: %d", ret);
return ret;
#else
UNUSED(sd_notify_msg);
return 0;
#endif
}
验证是不是PSYNC_NOT_SUPPORTED
如果不支持PSYNC,只能全同步
/* Fall back to SYNC if needed. Otherwise psync_result == PSYNC_FULLRESYNC
* and the server.master_replid and master_initial_offset are
* already populated. */
if (psync_result == PSYNC_NOT_SUPPORTED) {
serverLog(LL_NOTICE,"Retrying with SYNC...");
if (connSyncWrite(conn,"SYNC\r\n",6,server.repl_syncio_timeout*1000) == -1) {
serverLog(LL_WARNING,"I/O error writing to MASTER: %s",
strerror(errno));
goto error;
}
}
准备一个合适的临时文件批量传输
如果不是 无存储加载,获取时间和pid,打开存储文件,开始复制文件和改变文件描述符
if (!useDisklessLoad()) {
while(maxtries--) {
snprintf(tmpfile,256,
"temp-%d.%ld.rdb",(int)server.unixtime,(long int)getpid());
dfd = open(tmpfile,O_CREAT|O_WRONLY|O_EXCL,0644);
if (dfd != -1) break;
sleep(1);
}
if (dfd == -1) {
serverLog(LL_WARNING,"Opening the temp file needed for MASTER <-> REPLICA synchronization: %s",strerror(errno));
goto error;
}
server.repl_transfer_tmpfile = zstrdup(tmpfile);
server.repl_transfer_fd = dfd;
}
设置批量文件的非阻塞下载
/* Setup the non blocking download of the bulk file. */
if (connSetReadHandler(conn, readSyncBulkPayload) == C_ERR)
{
char conninfo[CONN_INFO_LEN];
serverLog(LL_WARNING,
"Can't create readable event for SYNC: %s (%s)",
strerror(errno), connGetInfo(conn, conninfo, sizeof(conninfo)));
goto error;
}
server.repl_state = REPL_STATE_TRANSFER;
server.repl_transfer_size = -1;
server.repl_transfer_read = 0;
server.repl_transfer_last_fsync_off = 0;
server.repl_transfer_lastio = server.unixtime;
return;