Redis 6.0 源码阅读笔记(11) -- 主从复制 Slave 节点流程分析

1. 主从复制中 Slave 节点的处理
Slave 节点主从复制的处理从建立从属关系开始,以下为大致的处理步骤:

Slave 节点接收 slaveof 、replicaof 命令,进行从属关系的处理
根据命令参数处理命令,因为也有可能是移除从属关系的操作
如果是建立从属关系的命令,经过初步校验,保存下主节点的相关信息,并将当前节点的同步状态设置为 REPL_STATE_CONNECT
定时任务检查到当前节点的同步状态为 REPL_STATE_CONNECT,则需要与主节点建立连接,连接成功后开始进行数据同步
同步数据前 Slave 节点需要与 Master 节点交互,约定数据同步的方式以及数据传输的形式,确定后等待 Master 节点传输数据
从节点清空 db 数据,接收主节点传输过来的 rdb 数据,并将其加载到 db 中

2. 处理流程分析

2.1 主从连接的建立
当 Redis 节点收到 slaveof 或者 replicaof 命令时会触发从属关系处理的函数 replication.c#replicaofCommand(),该函数只是个入口,处理逻辑较为简单:

首先是常规的状态检查,需要注意在 Redis 集群模式中 slaveof、replicaof 命令都是不支持的
检查命令参数是否是去除主从关系,例如 slaveof no one,这种命令由 replicationUnsetMaster() 函数处理,其内部逻辑很简单,不做分析
如果不是去除主从关系的操作,相关校验通过后,调用 replicationSetMaster() 函数进行处理

void replicaofCommand(client *c) {
 /* SLAVEOF is not allowed in cluster mode as replication is automatically
  * configured using the current address of the master node. */
 if (server.cluster_enabled) {
     addReplyError(c,"REPLICAOF not allowed in cluster mode.");
     return;
 }

 /* The special host/port combination "NO" "ONE" turns the instance
  * into a master. Otherwise the new master address is set. */
 if (!strcasecmp(c->argv[1]->ptr,"no") &&
     !strcasecmp(c->argv[2]->ptr,"one")) {
     if (server.masterhost) {
         replicationUnsetMaster();
         sds client = catClientInfoString(sdsempty(),c);
         serverLog(LL_NOTICE,"MASTER MODE enabled (user request from '%s')",
             client);
         sdsfree(client);
     }
 } else {
     long port;

     if (c->flags & CLIENT_SLAVE)
     {
         /* If a client is already a replica they cannot run this command,
          * because it involves flushing all replicas (including this
          * client) */
         addReplyError(c, "Command is not valid when client is a replica.");
         return;
     }

     if ((getLongFromObjectOrReply(c, c->argv[2], &port, NULL) != C_OK))
         return;

     /* Check if we are already attached to the specified slave */
     if (server.masterhost && !strcasecmp(server.masterhost,c->argv[1]->ptr)
         && server.masterport == port) {
         serverLog(LL_NOTICE,"REPLICAOF would result into synchronization "
                             "with the master we are already connected "
                             "with. No operation performed.");
         addReplySds(c,sdsnew("+OK Already connected to specified "
                              "master\r\n"));
         return;
     }
     /* There was no previous master or the user specified a different one,
      * we can continue. */
     replicationSetMaster(c->argv[1]->ptr, port);
     sds client = catClientInfoString(sdsempty(),c);
     serverLog(LL_NOTICE,"REPLICAOF %s:%d enabled (user request from '%s')",
         server.masterhost, server.masterport, client);
     sdsfree(client);
 }
 addReply(c,shared.ok);
}

replication.c#replicationSetMaster() 函数逻辑很简单,主要是进行相关变量的设置,如 server.masterhost、server.masterport,并且将当前节点的所有从节点连接都断开,因为当前节点变成了从节点,其数据集可能会变化,当前节点的从节点需要重新与其同步。最后的动作很关键,将节点的 server.repl_state同步状态设置为REPL_STATE_CONNECT
 

void replicationSetMaster(char *ip, int port) {
 int was_master = server.masterhost == NULL;

 sdsfree(server.masterhost);
 server.masterhost = sdsnew(ip);
 server.masterport = port;
 if (server.master) {
     freeClient(server.master);
 }
 disconnectAllBlockedClients(); /* Clients blocked in master, now slave. */

 /* Force our slaves to resync with us as well. They may hopefully be able
  * to partially resync with us, but we can notify the replid change. */
 disconnectSlaves();
 cancelReplicationHandshake();
 /* Before destroying our master state, create a cached master using
  * our own parameters, to later PSYNC with the new master. */
 if (was_master) {
     replicationDiscardCachedMaster();
     replicationCacheMasterUsingMyself();
 }

 /* Fire the role change modules event. */
 moduleFireServerEvent(REDISMODULE_EVENT_REPLICATION_ROLE_CHANGED,
                       REDISMODULE_EVENT_REPLROLECHANGED_NOW_REPLICA,
                       NULL);

 /* Fire the master link modules event. */
 if (server.repl_state == REPL_STATE_CONNECTED)
     moduleFireServerEvent(REDISMODULE_EVENT_MASTER_LINK_CHANGE,
                           REDISMODULE_SUBEVENT_MASTER_LINK_DOWN,
                           NULL);

 server.repl_state = REPL_STATE_CONNECT;
}

当节点的server.repl_state状态为REPL_STATE_CONNECT时,定时任务会识别出来并做对应的处理,这部分操作在 replication.c#replicationCron() 函数中。源码较长,不过重要的处理可以归纳如下:

首先是对超时的处理,如果当前节点与主节点上一次通信的时间距离现在超过了配置值 server.repl_timeout(默认 60s),则进行对应的处理
当节点的server.repl_state状态为REPL_STATE_CONNECT时,调用 connectWithMaster() 函数与主节点建立连接
如果当前节点已经处于数据同步中,则调用 replicationSendAck() 给主节点发送 ACK,防止自身被主节点识别为超时
如果当前节点也挂了从节点,则需要定期向从节点发送 PING,防止从节点误判超时

/* Replication cron function, called 1 time per second. */
void replicationCron(void) {
 static long long replication_cron_loops = 0;

 /* Non blocking connection timeout? */
 if (server.masterhost &&
     (server.repl_state == REPL_STATE_CONNECTING ||
      slaveIsInHandshakeState()) &&
      (time(NULL)-server.repl_transfer_lastio) > server.repl_timeout)
 {
     serverLog(LL_WARNING,"Timeout connecting to the MASTER...");
     cancelReplicationHandshake();
 }

 /* Bulk transfer I/O timeout? */
 if (server.masterhost && server.repl_state == REPL_STATE_TRANSFER &&
     (time(NULL)-server.repl_transfer_lastio) > server.repl_timeout)
 {
     serverLog(LL_WARNING,"Timeout receiving bulk data from MASTER... If the problem persists try to set the 'repl-timeout' parameter in redis.conf to a larger value.");
     cancelReplicationHandshake();
 }

 /* Timed out master when we are an already connected slave? */
 if (server.masterhost && server.repl_state == REPL_STATE_CONNECTED &&
     (time(NULL)-server.master->lastinteraction) > server.repl_timeout)
 {
     serverLog(LL_WARNING,"MASTER timeout: no data nor PING received...");
     freeClient(server.master);
 }

 /* Check if we should connect to a MASTER */
 if (server.repl_state == REPL_STATE_CONNECT) {
     serverLog(LL_NOTICE,"Connecting to MASTER %s:%d",
         server.masterhost, server.masterport);
     if (connectWithMaster() == C_OK) {
         serverLog(LL_NOTICE,"MASTER <-> REPLICA sync started");
     }
 }

 /* Send ACK to master from time to time.
  * Note that we do not send periodic acks to masters that don't
  * support PSYNC and replication offsets. */
 if (server.masterhost && server.master &&
     !(server.master->flags & CLIENT_PRE_PSYNC))
     replicationSendAck();

 /* If we have attached slaves, PING them from time to time.
  * So slaves can implement an explicit timeout to masters, and will
  * be able to detect a link disconnection even if the TCP connection
  * will not actually go down. */
 listIter li;
 listNode *ln;
 robj *ping_argv[1];

 /* First, send PING according to ping_slave_period. */
 if ((replication_cron_loops % server.repl_ping_slave_period) == 0 &&
     listLength(server.slaves))
 {
     /* Note that we don't send the PING if the clients are paused during
      * a Redis Cluster manual failover: the PING we send will otherwise
      * alter the replication offsets of master and slave, and will no longer
      * match the one stored into 'mf_master_offset' state. */
     int manual_failover_in_progress =
         server.cluster_enabled &&
         server.cluster->mf_end &&
         clientsArePaused();

     if (!manual_failover_in_progress) {
         ping_argv[0] = createStringObject("PING",4);
         replicationFeedSlaves(server.slaves, server.slaveseldb,
             ping_argv, 1);
         decrRefCount(ping_argv[0]);
     }
 }

 /* Second, send a newline to all the slaves in pre-synchronization
  * stage, that is, slaves waiting for the master to create the RDB file.
  *
  * Also send the a newline to all the chained slaves we have, if we lost
  * connection from our master, to keep the slaves aware that their
  * master is online. This is needed since sub-slaves only receive proxied
  * data from top-level masters, so there is no explicit pinging in order
  * to avoid altering the replication offsets. This special out of band
  * pings (newlines) can be sent, they will have no effect in the offset.
  *
  * The newline will be ignored by the slave but will refresh the
  * last interaction timer preventing a timeout. In this case we ignore the
  * ping period and refresh the connection once per second since certain
  * timeouts are set at a few seconds (example: PSYNC response). */
 listRewind(server.slaves,&li);
 while((ln = listNext(&li))) {
     client *slave = ln->value;

     int is_presync =
         (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_START ||
         (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_END &&
          server.rdb_child_type != RDB_CHILD_TYPE_SOCKET));

     if (is_presync) {
         connWrite(slave->conn, "\n", 1);
     }
 }

 /* Disconnect timedout slaves. */
 if (listLength(server.slaves)) {
     listIter li;
     listNode *ln;

     listRewind(server.slaves,&li);
     while((ln = listNext(&li))) {
         client *slave = ln->value;

         if (slave->replstate != SLAVE_STATE_ONLINE) continue;
         if (slave->flags & CLIENT_PRE_PSYNC) continue;
         if ((server.unixtime - slave->repl_ack_time) > server.repl_timeout)
         {
             serverLog(LL_WARNING, "Disconnecting timedout replica: %s",
                 replicationGetSlaveName(slave));
             freeClient(slave);
         }
     }
 }

 /* If this is a master without attached slaves and there is a replication
  * backlog active, in order to reclaim memory we can free it after some
  * (configured) time. Note that this cannot be done for slaves: slaves
  * without sub-slaves attached should still accumulate data into the
  * backlog, in order to reply to PSYNC queries if they are turned into
  * masters after a failover. */
 if (listLength(server.slaves) == 0 && server.repl_backlog_time_limit &&
     server.repl_backlog && server.masterhost == NULL)
 {
     time_t idle = server.unixtime - server.repl_no_slaves_since;

     if (idle > server.repl_backlog_time_limit) {
         /* When we free the backlog, we always use a new
          * replication ID and clear the ID2. This is needed
          * because when there is no backlog, the master_repl_offset
          * is not updated, but we would still retain our replication
          * ID, leading to the following problem:
          *
          * 1. We are a master instance.
          * 2. Our slave is promoted to master. It's repl-id-2 will
          *    be the same as our repl-id.
          * 3. We, yet as master, receive some updates, that will not
          *    increment the master_repl_offset.
          * 4. Later we are turned into a slave, connect to the new
          *    master that will accept our PSYNC request by second
          *    replication ID, but there will be data inconsistency
          *    because we received writes. */
         changeReplicationId();
         clearReplicationId2();
         freeReplicationBacklog();
         serverLog(LL_NOTICE,
             "Replication backlog freed after %d seconds "
             "without connected replicas.",
             (int) server.repl_backlog_time_limit);
     }
 }

 /* If AOF is disabled and we no longer have attached slaves, we can
  * free our Replication Script Cache as there is no need to propagate
  * EVALSHA at all. */
 if (listLength(server.slaves) == 0 &&
     server.aof_state == AOF_OFF &&
     listLength(server.repl_scriptcache_fifo) != 0)
 {
     replicationScriptCacheFlush();
 }

 /* Start a BGSAVE good for replication if we have slaves in
  * WAIT_BGSAVE_START state.
  *
  * In case of diskless replication, we make sure to wait the specified
  * number of seconds (according to configuration) so that other slaves
  * have the time to arrive before we start streaming. */
 if (!hasActiveChildProcess()) {
     time_t idle, max_idle = 0;
     int slaves_waiting = 0;
     int mincapa = -1;
     listNode *ln;
     listIter li;

     listRewind(server.slaves,&li);
     while((ln = listNext(&li))) {
         client *slave = ln->value;
         if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_START) {
             idle = server.unixtime - slave->lastinteraction;
             if (idle > max_idle) max_idle = idle;
             slaves_waiting++;
             mincapa = (mincapa == -1) ? slave->slave_capa :
                                         (mincapa & slave->slave_capa);
         }
     }

     if (slaves_waiting &&
         (!server.repl_diskless_sync ||
          max_idle > server.repl_diskless_sync_delay))
     {
         /* Start the BGSAVE. The called function may start a
          * BGSAVE with socket target or disk target depending on the
          * configuration and slaves capabilities. */
         startBgsaveForReplication(mincapa);
     }
 }

 /* Remove the RDB file used for replication if Redis is not running
  * with any persistence. */
 removeRDBUsedToSyncReplicas();

 /* Refresh the number of slaves with lag <= min-slaves-max-lag. */
 refreshGoodSlavesCount();
 replication_cron_loops++; /* Incremented with frequency 1 HZ. */
}

replication.c#connectWithMaster() 函数比较简单,只是调用 connConnect()函数连接主节点,并且设置该连接上的处理函数为 syncWithMaster(),此时当前节点的同步状态为 REPL_STATE_CONNECTING

int connectWithMaster(void) {
 server.repl_transfer_s = server.tls_replication ? connCreateTLS() : connCreateSocket();
 if (connConnect(server.repl_transfer_s, server.masterhost, server.masterport,
             NET_FIRST_BIND_ADDR, syncWithMaster) == C_ERR) {
     serverLog(LL_WARNING,"Unable to connect to MASTER: %s",
             connGetLastError(server.repl_transfer_s));
     connClose(server.repl_transfer_s);
     server.repl_transfer_s = NULL;
     return C_ERR;
 }


 server.repl_transfer_lastio = server.unixtime;
 server.repl_state = REPL_STATE_CONNECTING;
 return C_OK;
}

replication.c#syncWithMaster()函数主要负责和主节点交互,最终确定同步数据的方式(全量/部分)和数据的传输形式直接 socket 传输/rdb 文件传输),经过这个步骤后节点同步状态为REPL_STATE_TRANSFER。该函数源码较长,比较关键的是节点同步状态的流转:

当节点处于 REPL_STATE_SEND_CAPA 状态时,会将自己支持的能力发送给主节点,其中 eof 代表支持直接通过 socket 接收同步数据,psync2 代表支持部分复制
当节点处于 REPL_STATE_SEND_PSYNC 状态时,会调用函数 slaveTryPartialResynchronization() 与主节点确定同步数据的方式(全量/部分)
如果主节点返回不支持 PSYNC 命令,则需要重新发送 SYNC 命令给主节点进行全量复制
如果最后确定同步数据的方式为全量复制且不能直接通过 socket 传输数据,则当前节点需要创建一个缓存文件用于接收主节点发送过来的 rdb 文件
最后调用 connSetReadHandler() 在连接上注册 readSyncBulkPayload() 函数用于处理主节点传输过来的数据
 

#define REPL_STATE_NONE 0 /* No active replication */
#define REPL_STATE_CONNECT 1 /* Must connect to master */
#define REPL_STATE_CONNECTING 2 /* Connecting to master */
/* --- Handshake states, must be ordered --- */
#define REPL_STATE_RECEIVE_PONG 3 /* Wait for PING reply */
#define REPL_STATE_SEND_AUTH 4 /* Send AUTH to master */
#define REPL_STATE_RECEIVE_AUTH 5 /* Wait for AUTH reply */
#define REPL_STATE_SEND_PORT 6 /* Send REPLCONF listening-port */
#define REPL_STATE_RECEIVE_PORT 7 /* Wait for REPLCONF reply */
 #define REPL_STATE_SEND_IP 8 /* Send REPLCONF ip-address */
#define REPL_STATE_RECEIVE_IP 9 /* Wait for REPLCONF reply */
#define REPL_STATE_SEND_CAPA 10 /* Send REPLCONF capa */
#define REPL_STATE_RECEIVE_CAPA 11 /* Wait for REPLCONF reply */
#define REPL_STATE_SEND_PSYNC 12 /* Send PSYNC */
#define REPL_STATE_RECEIVE_PSYNC 13 /* Wait for PSYNC reply */
/* --- End of handshake states --- */
#define REPL_STATE_TRANSFER 14 /* Receiving .rdb from master */
#define REPL_STATE_CONNECTED 15 /* Connected to master */
void syncWithMaster(connection *conn) {
 char tmpfile[256], *err = NULL;
 int dfd = -1, maxtries = 5;
 int psync_result;

 /* If this event fired after the user turned the instance into a master
  * with SLAVEOF NO ONE we must just return ASAP. */
 if (server.repl_state == REPL_STATE_NONE) {
     connClose(conn);
     return;
 }

 /* Check for errors in the socket: after a non blocking connect() we
  * may find that the socket is in error state. */
 if (connGetState(conn) != CONN_STATE_CONNECTED) {
     serverLog(LL_WARNING,"Error condition on socket for SYNC: %s",
             connGetLastError(conn));
     goto error;
 }

 /* Send a PING to check the master is able to reply without errors. */
 if (server.repl_state == REPL_STATE_CONNECTING) {
     serverLog(LL_NOTICE,"Non blocking connect for SYNC fired the event.");
     /* Delete the writable event so that the readable event remains
      * registered and we can wait for the PONG reply. */
     connSetReadHandler(conn, syncWithMaster);
     connSetWriteHandler(conn, NULL);
     server.repl_state = REPL_STATE_RECEIVE_PONG;
     /* Send the PING, don't check for errors at all, we have the timeout
      * that will take care about this. */
     err = sendSynchronousCommand(SYNC_CMD_WRITE,conn,"PING",NULL);
     if (err) goto write_error;
     return;
 }

 /* Receive the PONG command. */
 if (server.repl_state == REPL_STATE_RECEIVE_PONG) {
     err = sendSynchronousCommand(SYNC_CMD_READ,conn,NULL);

     /* We accept only two replies as valid, a positive +PONG reply
      * (we just check for "+") or an authentication error.
      * Note that older versions of Redis replied with "operation not
      * permitted" instead of using a proper error code, so we test
      * both. */
     if (err[0] != '+' &&
         strncmp(err,"-NOAUTH",7) != 0 &&
         strncmp(err,"-ERR operation not permitted",28) != 0)
     {
         serverLog(LL_WARNING,"Error reply to PING from master: '%s'",err);
         sdsfree(err);
         goto error;
     } else {
         serverLog(LL_NOTICE,
             "Master replied to PING, replication can continue...");
     }
     sdsfree(err);
     server.repl_state = REPL_STATE_SEND_AUTH;
 }

 /* AUTH with the master if required. */
 if (server.repl_state == REPL_STATE_SEND_AUTH) {
     if (server.masteruser && server.masterauth) {
         err = sendSynchronousCommand(SYNC_CMD_WRITE,conn,"AUTH",
                                      server.masteruser,server.masterauth,NULL);
         if (err) goto write_error;
         server.repl_state = REPL_STATE_RECEIVE_AUTH;
         return;
     } else if (server.masterauth) {
         err = sendSynchronousCommand(SYNC_CMD_WRITE,conn,"AUTH",server.masterauth,NULL);
         if (err) goto write_error;
         server.repl_state = REPL_STATE_RECEIVE_AUTH;
         return;
     } else {
         server.repl_state = REPL_STATE_SEND_PORT;
     }
 }

 /* Receive AUTH reply. */
 if (server.repl_state == REPL_STATE_RECEIVE_AUTH) {
     err = sendSynchronousCommand(SYNC_CMD_READ,conn,NULL);
     if (err[0] == '-') {
         serverLog(LL_WARNING,"Unable to AUTH to MASTER: %s",err);
         sdsfree(err);
         goto error;
     }
     sdsfree(err);
     server.repl_state = REPL_STATE_SEND_PORT;
 }

 /* Set the slave port, so that Master's INFO command can list the
  * slave listening port correctly. */
 if (server.repl_state == REPL_STATE_SEND_PORT) {
     int port;
     if (server.slave_announce_port) port = server.slave_announce_port;
     else if (server.tls_replication && server.tls_port) port = server.tls_port;
     else port = server.port;
     sds portstr = sdsfromlonglong(port);
     err = sendSynchronousCommand(SYNC_CMD_WRITE,conn,"REPLCONF",
             "listening-port",portstr, NULL);
     sdsfree(portstr);
     if (err) goto write_error;
     sdsfree(err);
     server.repl_state = REPL_STATE_RECEIVE_PORT;
     return;
 }

 /* Receive REPLCONF listening-port reply. */
 if (server.repl_state == REPL_STATE_RECEIVE_PORT) {
     err = sendSynchronousCommand(SYNC_CMD_READ,conn,NULL);
     /* Ignore the error if any, not all the Redis versions support
      * REPLCONF listening-port. */
     if (err[0] == '-') {
         serverLog(LL_NOTICE,"(Non critical) Master does not understand "
                             "REPLCONF listening-port: %s", err);
     }
     sdsfree(err);
     server.repl_state = REPL_STATE_SEND_IP;
 }

 /* Skip REPLCONF ip-address if there is no slave-announce-ip option set. */
 if (server.repl_state == REPL_STATE_SEND_IP &&
     server.slave_announce_ip == NULL)
 {
         server.repl_state = REPL_STATE_SEND_CAPA;
 }

 /* Set the slave ip, so that Master's INFO command can list the
  * slave IP address port correctly in case of port forwarding or NAT. */
 if (server.repl_state == REPL_STATE_SEND_IP) {
     err = sendSynchronousCommand(SYNC_CMD_WRITE,conn,"REPLCONF",
             "ip-address",server.slave_announce_ip, NULL);
     if (err) goto write_error;
     sdsfree(err);
     server.repl_state = REPL_STATE_RECEIVE_IP;
     return;
 }

 /* Receive REPLCONF ip-address reply. */
 if (server.repl_state == REPL_STATE_RECEIVE_IP) {
     err = sendSynchronousCommand(SYNC_CMD_READ,conn,NULL);
     /* Ignore the error if any, not all the Redis versions support
      * REPLCONF listening-port. */
     if (err[0] == '-') {
         serverLog(LL_NOTICE,"(Non critical) Master does not understand "
                             "REPLCONF ip-address: %s", err);
     }
     sdsfree(err);
     server.repl_state = REPL_STATE_SEND_CAPA;
 }

 /* Inform the master of our (slave) capabilities.
  *
  * EOF: supports EOF-style RDB transfer for diskless replication.
  * PSYNC2: supports PSYNC v2, so understands +CONTINUE <new repl ID>.
  *
  * The master will ignore capabilities it does not understand. */
 if (server.repl_state == REPL_STATE_SEND_CAPA) {
     err = sendSynchronousCommand(SYNC_CMD_WRITE,conn,"REPLCONF",
             "capa","eof","capa","psync2",NULL);
     if (err) goto write_error;
     sdsfree(err);
     server.repl_state = REPL_STATE_RECEIVE_CAPA;
     return;
 }

 /* Receive CAPA reply. */
 if (server.repl_state == REPL_STATE_RECEIVE_CAPA) {
     err = sendSynchronousCommand(SYNC_CMD_READ,conn,NULL);
     /* Ignore the error if any, not all the Redis versions support
      * REPLCONF capa. */
     if (err[0] == '-') {
         serverLog(LL_NOTICE,"(Non critical) Master does not understand "
                               "REPLCONF capa: %s", err);
     }
     sdsfree(err);
     server.repl_state = REPL_STATE_SEND_PSYNC;
 }

 /* Try a partial resynchonization. If we don't have a cached master
  * slaveTryPartialResynchronization() will at least try to use PSYNC
  * to start a full resynchronization so that we get the master run id
  * and the global offset, to try a partial resync at the next
  * reconnection attempt. */
 if (server.repl_state == REPL_STATE_SEND_PSYNC) {
     if (slaveTryPartialResynchronization(conn,0) == PSYNC_WRITE_ERROR) {
         err = sdsnew("Write error sending the PSYNC command.");
         goto write_error;
     }
     server.repl_state = REPL_STATE_RECEIVE_PSYNC;
     return;
 }

 /* If reached this point, we should be in REPL_STATE_RECEIVE_PSYNC. */
 if (server.repl_state != REPL_STATE_RECEIVE_PSYNC) {
     serverLog(LL_WARNING,"syncWithMaster(): state machine error, "
                          "state should be RECEIVE_PSYNC but is %d",
                          server.repl_state);
     goto error;
 }

 psync_result = slaveTryPartialResynchronization(conn,1);
 if (psync_result == PSYNC_WAIT_REPLY) return; /* Try again later... */

 /* If the master is in an transient error, we should try to PSYNC
  * from scratch later, so go to the error path. This happens when
  * the server is loading the dataset or is not connected with its
  * master and so forth. */
 if (psync_result == PSYNC_TRY_LATER) goto error;

 /* Note: if PSYNC does not return WAIT_REPLY, it will take care of
  * uninstalling the read handler from the file descriptor. */

 if (psync_result == PSYNC_CONTINUE) {
     serverLog(LL_NOTICE, "MASTER <-> REPLICA sync: Master accepted a Partial Resynchronization.");
     if (server.supervised_mode == SUPERVISED_SYSTEMD) {
         redisCommunicateSystemd("STATUS=MASTER <-> REPLICA sync: Partial Resynchronization accepted. Ready to accept connections.\n");
         redisCommunicateSystemd("READY=1\n");
     }
     return;
 }

 /* PSYNC failed or is not supported: we want our slaves to resync with us
  * as well, if we have any sub-slaves. The master may transfer us an
  * entirely different data set and we have no way to incrementally feed
  * our slaves after that. */
 disconnectSlaves(); /* Force our slaves to resync with us as well. */
 freeReplicationBacklog(); /* Don't allow our chained slaves to PSYNC. */

 /* Fall back to SYNC if needed. Otherwise psync_result == PSYNC_FULLRESYNC
  * and the server.master_replid and master_initial_offset are
  * already populated. */
 if (psync_result == PSYNC_NOT_SUPPORTED) {
     serverLog(LL_NOTICE,"Retrying with SYNC...");
     if (connSyncWrite(conn,"SYNC\r\n",6,server.repl_syncio_timeout*1000) == -1) {
         serverLog(LL_WARNING,"I/O error writing to MASTER: %s",
             strerror(errno));
         goto error;
     }
 }

 /* Prepare a suitable temp file for bulk transfer */
 if (!useDisklessLoad()) {
     while(maxtries--) {
         snprintf(tmpfile,256,
             "temp-%d.%ld.rdb",(int)server.unixtime,(long int)getpid());
         dfd = open(tmpfile,O_CREAT|O_WRONLY|O_EXCL,0644);
         if (dfd != -1) break;
         sleep(1);
     }
     if (dfd == -1) {
         serverLog(LL_WARNING,"Opening the temp file needed for MASTER <-> REPLICA synchronization: %s",strerror(errno));
         goto error;
     }
     server.repl_transfer_tmpfile = zstrdup(tmpfile);
     server.repl_transfer_fd = dfd;
 }

 /* Setup the non blocking download of the bulk file. */
 if (connSetReadHandler(conn, readSyncBulkPayload)
         == C_ERR)
 {
     char conninfo[CONN_INFO_LEN];
     serverLog(LL_WARNING,
         "Can't create readable event for SYNC: %s (%s)",
         strerror(errno), connGetInfo(conn, conninfo, sizeof(conninfo)));
     goto error;
 }

 server.repl_state = REPL_STATE_TRANSFER;
 server.repl_transfer_size = -1;
 server.repl_transfer_read = 0;
 server.repl_transfer_last_fsync_off = 0;
 server.repl_transfer_lastio = server.unixtime;
 return;

error:
 if (dfd != -1) close(dfd);
 connClose(conn);
 server.repl_transfer_s = NULL;
 if (server.repl_transfer_fd != -1)
     close(server.repl_transfer_fd);
 if (server.repl_transfer_tmpfile)
     zfree(server.repl_transfer_tmpfile);
 server.repl_transfer_tmpfile = NULL;
 server.repl_transfer_fd = -1;
 server.repl_state = REPL_STATE_CONNECT;
 return;

write_error: /* Handle sendSynchronousCommand(SYNC_CMD_WRITE) errors. */
 serverLog(LL_WARNING,"Sending command to master in replication handshake: %s", err);
 sdsfree(err);
 goto error;
}

replication.c#slaveTryPartialResynchronization() 函数主要用于与主节点确定同步数据的方式,源码比较长,不过处理比较好理解

从节点发送 PSYNC 命令给主节点,同步处理主节点返回的结果
主节点返回的结果只有 4 种:
+FULLRESYNC: 主节点判断必须进行全量复制
+CONTINUE: 主节点判断可以进行部分复制
-NOMASTERLINK、-LOADING:主节点忙于其他事务,建议稍后重试
-ERR: 主节点不支持 PSYNC 命令或者发生了一些异常
 

int slaveTryPartialResynchronization(connection *conn, int read_reply) {
 char *psync_replid;
 char psync_offset[32];
 sds reply;

 /* Writing half */
 if (!read_reply) {
     /* Initially set master_initial_offset to -1 to mark the current
      * master run_id and offset as not valid. Later if we'll be able to do
      * a FULL resync using the PSYNC command we'll set the offset at the
      * right value, so that this information will be propagated to the
      * client structure representing the master into server.master. */
     server.master_initial_offset = -1;

     if (server.cached_master) {
         psync_replid = server.cached_master->replid;
         snprintf(psync_offset,sizeof(psync_offset),"%lld", server.cached_master->reploff+1);
         serverLog(LL_NOTICE,"Trying a partial resynchronization (request %s:%s).", psync_replid, psync_offset);
     } else {
         serverLog(LL_NOTICE,"Partial resynchronization not possible (no cached master)");
         psync_replid = "?";
         memcpy(psync_offset,"-1",3);
     }

     /* Issue the PSYNC command */
     reply = sendSynchronousCommand(SYNC_CMD_WRITE,conn,"PSYNC",psync_replid,psync_offset,NULL);
     if (reply != NULL) {
         serverLog(LL_WARNING,"Unable to send PSYNC to master: %s",reply);
         sdsfree(reply);
         connSetReadHandler(conn, NULL);
         return PSYNC_WRITE_ERROR;
     }
     return PSYNC_WAIT_REPLY;
 }

 /* Reading half */
 reply = sendSynchronousCommand(SYNC_CMD_READ,conn,NULL);
 if (sdslen(reply) == 0) {
     /* The master may send empty newlines after it receives PSYNC
      * and before to reply, just to keep the connection alive. */
     sdsfree(reply);
     return PSYNC_WAIT_REPLY;
 }

 connSetReadHandler(conn, NULL);

 if (!strncmp(reply,"+FULLRESYNC",11)) {
     char *replid = NULL, *offset = NULL;

     /* FULL RESYNC, parse the reply in order to extract the run id
      * and the replication offset. */
     replid = strchr(reply,' ');
     if (replid) {
         replid++;
         offset = strchr(replid,' ');
         if (offset) offset++;
     }
     if (!replid || !offset || (offset-replid-1) != CONFIG_RUN_ID_SIZE) {
         serverLog(LL_WARNING,
             "Master replied with wrong +FULLRESYNC syntax.");
         /* This is an unexpected condition, actually the +FULLRESYNC
          * reply means that the master supports PSYNC, but the reply
          * format seems wrong. To stay safe we blank the master
          * replid to make sure next PSYNCs will fail. */
         memset(server.master_replid,0,CONFIG_RUN_ID_SIZE+1);
     } else {
         memcpy(server.master_replid, replid, offset-replid-1);
         server.master_replid[CONFIG_RUN_ID_SIZE] = '\0';
         server.master_initial_offset = strtoll(offset,NULL,10);
         serverLog(LL_NOTICE,"Full resync from master: %s:%lld",
             server.master_replid,
             server.master_initial_offset);
     }
     /* We are going to full resync, discard the cached master structure. */
     replicationDiscardCachedMaster();
     sdsfree(reply);
     return PSYNC_FULLRESYNC;
 }

 if (!strncmp(reply,"+CONTINUE",9)) {
     /* Partial resync was accepted. */
     serverLog(LL_NOTICE,
         "Successful partial resynchronization with master.");

     /* Check the new replication ID advertised by the master. If it
      * changed, we need to set the new ID as primary ID, and set or
      * secondary ID as the old master ID up to the current offset, so
      * that our sub-slaves will be able to PSYNC with us after a
      * disconnection. */
     char *start = reply+10;
     char *end = reply+9;
     while(end[0] != '\r' && end[0] != '\n' && end[0] != '\0') end++;
     if (end-start == CONFIG_RUN_ID_SIZE) {
         char new[CONFIG_RUN_ID_SIZE+1];
         memcpy(new,start,CONFIG_RUN_ID_SIZE);
         new[CONFIG_RUN_ID_SIZE] = '\0';

         if (strcmp(new,server.cached_master->replid)) {
             /* Master ID changed. */
             serverLog(LL_WARNING,"Master replication ID changed to %s",new);

             /* Set the old ID as our ID2, up to the current offset+1. */
             memcpy(server.replid2,server.cached_master->replid,
                 sizeof(server.replid2));
             server.second_replid_offset = server.master_repl_offset+1;

             /* Update the cached master ID and our own primary ID to the
              * new one. */
             memcpy(server.replid,new,sizeof(server.replid));
             memcpy(server.cached_master->replid,new,sizeof(server.replid));

             /* Disconnect all the sub-slaves: they need to be notified. */
             disconnectSlaves();
         }
     }

     /* Setup the replication to continue. */
     sdsfree(reply);
     replicationResurrectCachedMaster(conn);

     /* If this instance was restarted and we read the metadata to
      * PSYNC from the persistence file, our replication backlog could
      * be still not initialized. Create it. */
     if (server.repl_backlog == NULL) createReplicationBacklog();
     return PSYNC_CONTINUE;
 }

 /* If we reach this point we received either an error (since the master does
  * not understand PSYNC or because it is in a special state and cannot
  * serve our request), or an unexpected reply from the master.
  *
  * Return PSYNC_NOT_SUPPORTED on errors we don't understand, otherwise
  * return PSYNC_TRY_LATER if we believe this is a transient error. */

 if (!strncmp(reply,"-NOMASTERLINK",13) ||
     !strncmp(reply,"-LOADING",8))
 {
     serverLog(LL_NOTICE,
         "Master is currently unable to PSYNC "
         "but should be in the future: %s", reply);
     sdsfree(reply);
     return PSYNC_TRY_LATER;
 }

 if (strncmp(reply,"-ERR",4)) {
     /* If it's not an error, log the unexpected event. */
     serverLog(LL_WARNING,
         "Unexpected reply to PSYNC from master: %s", reply);
 } else {
     serverLog(LL_NOTICE,
         "Master does not support PSYNC or is in "
         "error state (reply: %s)", reply);
 }
 sdsfree(reply);
 replicationDiscardCachedMaster();
 return PSYNC_NOT_SUPPORTED;
}

2.2 rdb 数据的接收
上一节已经分析完了主从连接的建立,本节从 replication.c#readSyncBulkPayload() 函数分析从节点处理主节点传输过来的数据的过程。该函数非常长,大致逻辑如下:

首先从与主节点的连接中读取数据,当 rdb 数据的传输方式为传输 rdb 文件时,将从连接上读取到的数据写入到之前创建的临时文件中
如果当前节点开启了 AOF,调用 stopAppendOnly() 函数将其关闭
调用 emptyDb() 函数清空当前节点的 db 数据
如果主节点是直接通过 socket 传输 rdb 数据,则从节点调用 rdbLoadRio() 函数从 socket 中读取 rdb 数据并加载进 db
如果主节点传输的是 rdb 文件,则将临时文件重命名为 rdb 文件名称,并调用 rdbLoad() 函数完成加载 rdb 文件到内存
最后如果当前节点配置开启了 AOF,则调用 restartAOFAfterSYNC() 函数重新启动 AOF

void readSyncBulkPayload(connection *conn) {
char buf[PROTO_IOBUF_LEN];
ssize_t nread, readlen, nwritten;
int use_diskless_load = useDisklessLoad();
redisDb *diskless_load_backup = NULL;
int empty_db_flags = server.repl_slave_lazy_flush ? EMPTYDB_ASYNC :
                                                    EMPTYDB_NO_FLAGS;
off_t left;

/* Static vars used to hold the EOF mark, and the last bytes received
 * form the server: when they match, we reached the end of the transfer. */
static char eofmark[CONFIG_RUN_ID_SIZE];
static char lastbytes[CONFIG_RUN_ID_SIZE];
static int usemark = 0;

/* If repl_transfer_size == -1 we still have to read the bulk length
 * from the master reply. */
if (server.repl_transfer_size == -1) {
    if (connSyncReadLine(conn,buf,1024,server.repl_syncio_timeout*1000) == -1) {
        serverLog(LL_WARNING,
            "I/O error reading bulk count from MASTER: %s",
            strerror(errno));
        goto error;
    }

    if (buf[0] == '-') {
        serverLog(LL_WARNING,
            "MASTER aborted replication with an error: %s",
            buf+1);
        goto error;
    } else if (buf[0] == '\0') {
        /* At this stage just a newline works as a PING in order to take
         * the connection live. So we refresh our last interaction
         * timestamp. */
        server.repl_transfer_lastio = server.unixtime;
        return;
    } else if (buf[0] != '$') {
        serverLog(LL_WARNING,"Bad protocol from MASTER, the first byte is not '$' (we received '%s'), are you sure the host and port are right?", buf);
        goto error;
    }

    /* There are two possible forms for the bulk payload. One is the
     * usual $<count> bulk format. The other is used for diskless transfers
     * when the master does not know beforehand the size of the file to
     * transfer. In the latter case, the following format is used:
     *
     * $EOF:<40 bytes delimiter>
     *
     * At the end of the file the announced delimiter is transmitted. The
     * delimiter is long and random enough that the probability of a
     * collision with the actual file content can be ignored. */
    if (strncmp(buf+1,"EOF:",4) == 0 && strlen(buf+5) >= CONFIG_RUN_ID_SIZE) {
        usemark = 1;
        memcpy(eofmark,buf+5,CONFIG_RUN_ID_SIZE);
        memset(lastbytes,0,CONFIG_RUN_ID_SIZE);
        /* Set any repl_transfer_size to avoid entering this code path
         * at the next call. */
        server.repl_transfer_size = 0;
        serverLog(LL_NOTICE,
            "MASTER <-> REPLICA sync: receiving streamed RDB from master with EOF %s",
            use_diskless_load? "to parser":"to disk");
    } else {
        usemark = 0;
        server.repl_transfer_size = strtol(buf+1,NULL,10);
        serverLog(LL_NOTICE,
            "MASTER <-> REPLICA sync: receiving %lld bytes from master %s",
            (long long) server.repl_transfer_size,
            use_diskless_load? "to parser":"to disk");
    }
    return;
}

if (!use_diskless_load) {
    /* Read the data from the socket, store it to a file and search
     * for the EOF. */
    if (usemark) {
        readlen = sizeof(buf);
    } else {
        left = server.repl_transfer_size - server.repl_transfer_read;
        readlen = (left < (signed)sizeof(buf)) ? left : (signed)sizeof(buf);
    }

    nread = connRead(conn,buf,readlen);
    if (nread <= 0) {
        if (connGetState(conn) == CONN_STATE_CONNECTED) {
            /* equivalent to EAGAIN */
            return;
        }
        serverLog(LL_WARNING,"I/O error trying to sync with MASTER: %s",
            (nread == -1) ? strerror(errno) : "connection lost");
        cancelReplicationHandshake();
        return;
    }
    server.stat_net_input_bytes += nread;

    /* When a mark is used, we want to detect EOF asap in order to avoid
     * writing the EOF mark into the file... */
    int eof_reached = 0;

    if (usemark) {
        /* Update the last bytes array, and check if it matches our
         * delimiter. */
        if (nread >= CONFIG_RUN_ID_SIZE) {
            memcpy(lastbytes,buf+nread-CONFIG_RUN_ID_SIZE,
                   CONFIG_RUN_ID_SIZE);
        } else {
            int rem = CONFIG_RUN_ID_SIZE-nread;
            memmove(lastbytes,lastbytes+nread,rem);
            memcpy(lastbytes+rem,buf,nread);
        }
        if (memcmp(lastbytes,eofmark,CONFIG_RUN_ID_SIZE) == 0)
            eof_reached = 1;
    }

    /* Update the last I/O time for the replication transfer (used in
     * order to detect timeouts during replication), and write what we
     * got from the socket to the dump file on disk. */
    server.repl_transfer_lastio = server.unixtime;
    if ((nwritten = write(server.repl_transfer_fd,buf,nread)) != nread) {
        serverLog(LL_WARNING,
            "Write error or short write writing to the DB dump file "
            "needed for MASTER <-> REPLICA synchronization: %s",
            (nwritten == -1) ? strerror(errno) : "short write");
        goto error;
    }
    server.repl_transfer_read += nread;

    /* Delete the last 40 bytes from the file if we reached EOF. */
    if (usemark && eof_reached) {
        if (ftruncate(server.repl_transfer_fd,
            server.repl_transfer_read - CONFIG_RUN_ID_SIZE) == -1)
        {
            serverLog(LL_WARNING,
                "Error truncating the RDB file received from the master "
                "for SYNC: %s", strerror(errno));
            goto error;
        }
    }

    /* Sync data on disk from time to time, otherwise at the end of the
     * transfer we may suffer a big delay as the memory buffers are copied
     * into the actual disk. */
    if (server.repl_transfer_read >=
        server.repl_transfer_last_fsync_off + REPL_MAX_WRITTEN_BEFORE_FSYNC)
    {
        off_t sync_size = server.repl_transfer_read -
                          server.repl_transfer_last_fsync_off;
        rdb_fsync_range(server.repl_transfer_fd,
            server.repl_transfer_last_fsync_off, sync_size);
        server.repl_transfer_last_fsync_off += sync_size;
    }

    /* Check if the transfer is now complete */
    if (!usemark) {
        if (server.repl_transfer_read == server.repl_transfer_size)
            eof_reached = 1;
    }

    /* If the transfer is yet not complete, we need to read more, so
     * return ASAP and wait for the handler to be called again. */
    if (!eof_reached) return;
}

/* We reach this point in one of the following cases:
 *
 * 1. The replica is using diskless replication, that is, it reads data
 *    directly from the socket to the Redis memory, without using
 *    a temporary RDB file on disk. In that case we just block and
 *    read everything from the socket.
 *
 * 2. Or when we are done reading from the socket to the RDB file, in
 *    such case we want just to read the RDB file in memory. */
serverLog(LL_NOTICE, "MASTER <-> REPLICA sync: Flushing old data");

/* We need to stop any AOF rewriting child before flusing and parsing
 * the RDB, otherwise we'll create a copy-on-write disaster. */
if (server.aof_state != AOF_OFF) stopAppendOnly();

/* When diskless RDB loading is used by replicas, it may be configured
 * in order to save the current DB instead of throwing it away,
 * so that we can restore it in case of failed transfer. */
if (use_diskless_load &&
    server.repl_diskless_load == REPL_DISKLESS_LOAD_SWAPDB)
{
    /* Create a backup of server.db[] and initialize to empty
     * dictionaries */
    diskless_load_backup = disklessLoadMakeBackups();
}
/* We call to emptyDb even in case of REPL_DISKLESS_LOAD_SWAPDB
 * (Where disklessLoadMakeBackups left server.db empty) because we
 * want to execute all the auxiliary logic of emptyDb (Namely,
 * fire module events) */
emptyDb(-1,empty_db_flags,replicationEmptyDbCallback);

/* Before loading the DB into memory we need to delete the readable
 * handler, otherwise it will get called recursively since
 * rdbLoad() will call the event loop to process events from time to
 * time for non blocking loading. */
connSetReadHandler(conn, NULL);
serverLog(LL_NOTICE, "MASTER <-> REPLICA sync: Loading DB in memory");
rdbSaveInfo rsi = RDB_SAVE_INFO_INIT;
if (use_diskless_load) {
    rio rdb;
    rioInitWithConn(&rdb,conn,server.repl_transfer_size);

    /* Put the socket in blocking mode to simplify RDB transfer.
     * We'll restore it when the RDB is received. */
    connBlock(conn);
    connRecvTimeout(conn, server.repl_timeout*1000);
    startLoading(server.repl_transfer_size, RDBFLAGS_REPLICATION);

    if (rdbLoadRio(&rdb,RDBFLAGS_REPLICATION,&rsi) != C_OK) {
        /* RDB loading failed. */
        stopLoading(0);
        serverLog(LL_WARNING,
            "Failed trying to load the MASTER synchronization DB "
            "from socket");
        cancelReplicationHandshake();
        rioFreeConn(&rdb, NULL);
        if (server.repl_diskless_load == REPL_DISKLESS_LOAD_SWAPDB) {
            /* Restore the backed up databases. */
            disklessLoadRestoreBackups(diskless_load_backup,1,
                                       empty_db_flags);
        } else {
            /* Remove the half-loaded data in case we started with
             * an empty replica. */
            emptyDb(-1,empty_db_flags,replicationEmptyDbCallback);
        }

        /* Note that there's no point in restarting the AOF on SYNC
         * failure, it'll be restarted when sync succeeds or the replica
         * gets promoted. */
        return;
    }
    stopLoading(1);

    /* RDB loading succeeded if we reach this point. */
    if (server.repl_diskless_load == REPL_DISKLESS_LOAD_SWAPDB) {
        /* Delete the backup databases we created before starting to load
         * the new RDB. Now the RDB was loaded with success so the old
         * data is useless. */
        disklessLoadRestoreBackups(diskless_load_backup,0,empty_db_flags);
    }

    /* Verify the end mark is correct. */
    if (usemark) {
        if (!rioRead(&rdb,buf,CONFIG_RUN_ID_SIZE) ||
            memcmp(buf,eofmark,CONFIG_RUN_ID_SIZE) != 0)
        {
            serverLog(LL_WARNING,"Replication stream EOF marker is broken");
            cancelReplicationHandshake();
            rioFreeConn(&rdb, NULL);
            return;
        }
    }

    /* Cleanup and restore the socket to the original state to continue
     * with the normal replication. */
    rioFreeConn(&rdb, NULL);
    connNonBlock(conn);
    connRecvTimeout(conn,0);
} else {
    /* Ensure background save doesn't overwrite synced data */
    if (server.rdb_child_pid != -1) {
        serverLog(LL_NOTICE,
            "Replica is about to load the RDB file received from the "
            "master, but there is a pending RDB child running. "
            "Killing process %ld and removing its temp file to avoid "
            "any race",
                (long) server.rdb_child_pid);
        killRDBChild();
    }

    /* Rename rdb like renaming rewrite aof asynchronously. */
    int old_rdb_fd = open(server.rdb_filename,O_RDONLY|O_NONBLOCK);
    if (rename(server.repl_transfer_tmpfile,server.rdb_filename) == -1) {
        serverLog(LL_WARNING,
            "Failed trying to rename the temp DB into %s in "
            "MASTER <-> REPLICA synchronization: %s",
            server.rdb_filename, strerror(errno));
        cancelReplicationHandshake();
        if (old_rdb_fd != -1) close(old_rdb_fd);
        return;
    }
    /* Close old rdb asynchronously. */
    if (old_rdb_fd != -1) bioCreateBackgroundJob(BIO_CLOSE_FILE,(void*)(long)old_rdb_fd,NULL,NULL);

    if (rdbLoad(server.rdb_filename,&rsi,RDBFLAGS_REPLICATION) != C_OK) {
        serverLog(LL_WARNING,
            "Failed trying to load the MASTER synchronization "
            "DB from disk");
        cancelReplicationHandshake();
        if (server.rdb_del_sync_files && allPersistenceDisabled()) {
            serverLog(LL_NOTICE,"Removing the RDB file obtained from "
                                "the master. This replica has persistence "
                                "disabled");
            bg_unlink(server.rdb_filename);
        }
        /* Note that there's no point in restarting the AOF on sync failure,
           it'll be restarted when sync succeeds or replica promoted. */
        return;
    }

    /* Cleanup. */
    if (server.rdb_del_sync_files && allPersistenceDisabled()) {
        serverLog(LL_NOTICE,"Removing the RDB file obtained from "
                            "the master. This replica has persistence "
                            "disabled");
        bg_unlink(server.rdb_filename);
    }

    zfree(server.repl_transfer_tmpfile);
    close(server.repl_transfer_fd);
    server.repl_transfer_fd = -1;
    server.repl_transfer_tmpfile = NULL;
}

/* Final setup of the connected slave <- master link */
replicationCreateMasterClient(server.repl_transfer_s,rsi.repl_stream_db);
server.repl_state = REPL_STATE_CONNECTED;
server.repl_down_since = 0;

/* Fire the master link modules event. */
moduleFireServerEvent(REDISMODULE_EVENT_MASTER_LINK_CHANGE,
                      REDISMODULE_SUBEVENT_MASTER_LINK_UP,
                      NULL);

/* After a full resynchroniziation we use the replication ID and
 * offset of the master. The secondary ID / offset are cleared since
 * we are starting a new history. */
memcpy(server.replid,server.master->replid,sizeof(server.replid));
server.master_repl_offset = server.master->reploff;
clearReplicationId2();

/* Let's create the replication backlog if needed. Slaves need to
 * accumulate the backlog regardless of the fact they have sub-slaves
 * or not, in order to behave correctly if they are promoted to
 * masters after a failover. */
if (server.repl_backlog == NULL) createReplicationBacklog();
serverLog(LL_NOTICE, "MASTER <-> REPLICA sync: Finished with success");

if (server.supervised_mode == SUPERVISED_SYSTEMD) {
    redisCommunicateSystemd("STATUS=MASTER <-> REPLICA sync: Finished with success. Ready to accept connections.\n");
    redisCommunicateSystemd("READY=1\n");
}

/* Restart the AOF subsystem now that we finished the sync. This
 * will trigger an AOF rewrite, and when done will start appending
 * to the new file. */
if (server.aof_enabled) restartAOFAfterSYNC();
return;

error:
cancelReplicationHandshake();
return;
}

rdb.c#rdbLoadRio() 函数内部主要是解析 rdb 数据并将其中 key -value 加载进节点的 db 中,源码较长,本文不做具体分析,读者如有兴趣可以自行理解分析

int rdbLoadRio(rio *rdb, int rdbflags, rdbSaveInfo *rsi) {
 uint64_t dbid;
 int type, rdbver;
 redisDb *db = server.db+0;
 char buf[1024];

 rdb->update_cksum = rdbLoadProgressCallback;
 rdb->max_processing_chunk = server.loading_process_events_interval_bytes;
 if (rioRead(rdb,buf,9) == 0) goto eoferr;
 buf[9] = '\0';
 if (memcmp(buf,"REDIS",5) != 0) {
     serverLog(LL_WARNING,"Wrong signature trying to load DB from file");
     errno = EINVAL;
     return C_ERR;
 }
 rdbver = atoi(buf+5);
 if (rdbver < 1 || rdbver > RDB_VERSION) {
     serverLog(LL_WARNING,"Can't handle RDB format version %d",rdbver);
     errno = EINVAL;
     return C_ERR;
 }

 /* Key-specific attributes, set by opcodes before the key type. */
 long long lru_idle = -1, lfu_freq = -1, expiretime = -1, now = mstime();
 long long lru_clock = LRU_CLOCK();

 while(1) {
     sds key;
     robj *val;

     /* Read type. */
     if ((type = rdbLoadType(rdb)) == -1) goto eoferr;

     /* Handle special types. */
     if (type == RDB_OPCODE_EXPIRETIME) {
         /* EXPIRETIME: load an expire associated with the next key
          * to load. Note that after loading an expire we need to
          * load the actual type, and continue. */
         expiretime = rdbLoadTime(rdb);
         expiretime *= 1000;
         if (rioGetReadError(rdb)) goto eoferr;
         continue; /* Read next opcode. */
     } else if (type == RDB_OPCODE_EXPIRETIME_MS) {
         /* EXPIRETIME_MS: milliseconds precision expire times introduced
          * with RDB v3. Like EXPIRETIME but no with more precision. */
         expiretime = rdbLoadMillisecondTime(rdb,rdbver);
         if (rioGetReadError(rdb)) goto eoferr;
         continue; /* Read next opcode. */
     } else if (type == RDB_OPCODE_FREQ) {
         /* FREQ: LFU frequency. */
         uint8_t byte;
         if (rioRead(rdb,&byte,1) == 0) goto eoferr;
         lfu_freq = byte;
         continue; /* Read next opcode. */
     } else if (type == RDB_OPCODE_IDLE) {
         /* IDLE: LRU idle time. */
         uint64_t qword;
         if ((qword = rdbLoadLen(rdb,NULL)) == RDB_LENERR) goto eoferr;
         lru_idle = qword;
         continue; /* Read next opcode. */
     } else if (type == RDB_OPCODE_EOF) {
         /* EOF: End of file, exit the main loop. */
         break;
     } else if (type == RDB_OPCODE_SELECTDB) {
         /* SELECTDB: Select the specified database. */
         if ((dbid = rdbLoadLen(rdb,NULL)) == RDB_LENERR) goto eoferr;
         if (dbid >= (unsigned)server.dbnum) {
             serverLog(LL_WARNING,
                 "FATAL: Data file was created with a Redis "
                 "server configured to handle more than %d "
                 "databases. Exiting\n", server.dbnum);
             exit(1);
         }
         db = server.db+dbid;
         continue; /* Read next opcode. */
     } else if (type == RDB_OPCODE_RESIZEDB) {
         /* RESIZEDB: Hint about the size of the keys in the currently
          * selected data base, in order to avoid useless rehashing. */
         uint64_t db_size, expires_size;
         if ((db_size = rdbLoadLen(rdb,NULL)) == RDB_LENERR)
             goto eoferr;
         if ((expires_size = rdbLoadLen(rdb,NULL)) == RDB_LENERR)
             goto eoferr;
         dictExpand(db->dict,db_size);
         dictExpand(db->expires,expires_size);
         continue; /* Read next opcode. */
     } else if (type == RDB_OPCODE_AUX) {
         /* AUX: generic string-string fields. Use to add state to RDB
          * which is backward compatible. Implementations of RDB loading
          * are requierd to skip AUX fields they don't understand.
          *
          * An AUX field is composed of two strings: key and value. */
         robj *auxkey, *auxval;
         if ((auxkey = rdbLoadStringObject(rdb)) == NULL) goto eoferr;
         if ((auxval = rdbLoadStringObject(rdb)) == NULL) goto eoferr;

         if (((char*)auxkey->ptr)[0] == '%') {
             /* All the fields with a name staring with '%' are considered
              * information fields and are logged at startup with a log
              * level of NOTICE. */
             serverLog(LL_NOTICE,"RDB '%s': %s",
                 (char*)auxkey->ptr,
                 (char*)auxval->ptr);
         } else if (!strcasecmp(auxkey->ptr,"repl-stream-db")) {
             if (rsi) rsi->repl_stream_db = atoi(auxval->ptr);
         } else if (!strcasecmp(auxkey->ptr,"repl-id")) {
             if (rsi && sdslen(auxval->ptr) == CONFIG_RUN_ID_SIZE) {
                 memcpy(rsi->repl_id,auxval->ptr,CONFIG_RUN_ID_SIZE+1);
                 rsi->repl_id_is_set = 1;
             }
         } else if (!strcasecmp(auxkey->ptr,"repl-offset")) {
             if (rsi) rsi->repl_offset = strtoll(auxval->ptr,NULL,10);
         } else if (!strcasecmp(auxkey->ptr,"lua")) {
             /* Load the script back in memory. */
             if (luaCreateFunction(NULL,server.lua,auxval) == NULL) {
                 rdbExitReportCorruptRDB(
                     "Can't load Lua script from RDB file! "
                     "BODY: %s", auxval->ptr);
             }
         } else if (!strcasecmp(auxkey->ptr,"redis-ver")) {
             serverLog(LL_NOTICE,"Loading RDB produced by version %s",
                 (char*)auxval->ptr);
         } else if (!strcasecmp(auxkey->ptr,"ctime")) {
             time_t age = time(NULL)-strtol(auxval->ptr,NULL,10);
             if (age < 0) age = 0;
             serverLog(LL_NOTICE,"RDB age %ld seconds",
                 (unsigned long) age);
         } else if (!strcasecmp(auxkey->ptr,"used-mem")) {
             long long usedmem = strtoll(auxval->ptr,NULL,10);
             serverLog(LL_NOTICE,"RDB memory usage when created %.2f Mb",
                 (double) usedmem / (1024*1024));
         } else if (!strcasecmp(auxkey->ptr,"aof-preamble")) {
             long long haspreamble = strtoll(auxval->ptr,NULL,10);
             if (haspreamble) serverLog(LL_NOTICE,"RDB has an AOF tail");
         } else if (!strcasecmp(auxkey->ptr,"redis-bits")) {
             /* Just ignored. */
         } else {
             /* We ignore fields we don't understand, as by AUX field
              * contract. */
             serverLog(LL_DEBUG,"Unrecognized RDB AUX field: '%s'",
                 (char*)auxkey->ptr);
         }

         decrRefCount(auxkey);
         decrRefCount(auxval);
         continue; /* Read type again. */
     } else if (type == RDB_OPCODE_MODULE_AUX) {
         /* Load module data that is not related to the Redis key space.
          * Such data can be potentially be stored both before and after the
          * RDB keys-values section. */
         uint64_t moduleid = rdbLoadLen(rdb,NULL);
         int when_opcode = rdbLoadLen(rdb,NULL);
         int when = rdbLoadLen(rdb,NULL);
         if (rioGetReadError(rdb)) goto eoferr;
         if (when_opcode != RDB_MODULE_OPCODE_UINT)
             rdbReportReadError("bad when_opcode");
         moduleType *mt = moduleTypeLookupModuleByID(moduleid);
         char name[10];
         moduleTypeNameByID(name,moduleid);

         if (!rdbCheckMode && mt == NULL) {
             /* Unknown module. */
             serverLog(LL_WARNING,"The RDB file contains AUX module data I can't load: no matching module '%s'", name);
             exit(1);
         } else if (!rdbCheckMode && mt != NULL) {
             if (!mt->aux_load) {
                 /* Module doesn't support AUX. */
                 serverLog(LL_WARNING,"The RDB file contains module AUX data, but the module '%s' doesn't seem to support it.", name);
                 exit(1);
             }

             RedisModuleIO io;
             moduleInitIOContext(io,mt,rdb,NULL);
             io.ver = 2;
             /* Call the rdb_load method of the module providing the 10 bit
              * encoding version in the lower 10 bits of the module ID. */
             if (mt->aux_load(&io,moduleid&1023, when) != REDISMODULE_OK || io.error) {
                 moduleTypeNameByID(name,moduleid);
                 serverLog(LL_WARNING,"The RDB file contains module AUX data for the module type '%s', that the responsible module is not able to load. Check for modules log above for additional clues.", name);
                 exit(1);
             }
             if (io.ctx) {
                 moduleFreeContext(io.ctx);
                 zfree(io.ctx);
             }
             uint64_t eof = rdbLoadLen(rdb,NULL);
             if (eof != RDB_MODULE_OPCODE_EOF) {
                 serverLog(LL_WARNING,"The RDB file contains module AUX data for the module '%s' that is not terminated by the proper module value EOF marker", name);
                 exit(1);
             }
             continue;
         } else {
             /* RDB check mode. */
             robj *aux = rdbLoadCheckModuleValue(rdb,name);
             decrRefCount(aux);
             continue; /* Read next opcode. */
         }
     }

     /* Read key */
     if ((key = rdbGenericLoadStringObject(rdb,RDB_LOAD_SDS,NULL)) == NULL)
         goto eoferr;
     /* Read value */
     if ((val = rdbLoadObject(type,rdb,key)) == NULL) {
         sdsfree(key);
         goto eoferr;
     }

     /* Check if the key already expired. This function is used when loading
      * an RDB file from disk, either at startup, or when an RDB was
      * received from the master. In the latter case, the master is
      * responsible for key expiry. If we would expire keys here, the
      * snapshot taken by the master may not be reflected on the slave.
      * Similarly if the RDB is the preamble of an AOF file, we want to
      * load all the keys as they are, since the log of operations later
      * assume to work in an exact keyspace state. */
     if (iAmMaster() &&
         !(rdbflags&RDBFLAGS_AOF_PREAMBLE) &&
         expiretime != -1 && expiretime < now)
     {
         sdsfree(key);
         decrRefCount(val);
     } else {
         robj keyobj;

         /* Add the new object in the hash table */
         int added = dbAddRDBLoad(db,key,val);
         if (!added) {
             if (rdbflags & RDBFLAGS_ALLOW_DUP) {
                 /* This flag is useful for DEBUG RELOAD special modes.
                  * When it's set we allow new keys to replace the current
                  * keys with the same name. */
                 initStaticStringObject(keyobj,key);
                 dbSyncDelete(db,&keyobj);
                 dbAddRDBLoad(db,key,val);
             } else {
                 serverLog(LL_WARNING,
                     "RDB has duplicated key '%s' in DB %d",key,db->id);
                 serverPanic("Duplicated key found in RDB file");
             }
         }

         /* Set the expire time if needed */
         if (expiretime != -1) {
             initStaticStringObject(keyobj,key);
             setExpire(NULL,db,&keyobj,expiretime);
         }

         /* Set usage information (for eviction). */
         objectSetLRUOrLFU(val,lfu_freq,lru_idle,lru_clock,1000);
     }

     /* Loading the database more slowly is useful in order to test
      * certain edge cases. */
     if (server.key_load_delay) usleep(server.key_load_delay);

     /* Reset the state that is key-specified and is populated by
      * opcodes before the key, so that we start from scratch again. */
     expiretime = -1;
     lfu_freq = -1;
     lru_idle = -1;
 }
 /* Verify the checksum if RDB version is >= 5 */
 if (rdbver >= 5) {
     uint64_t cksum, expected = rdb->cksum;

     if (rioRead(rdb,&cksum,8) == 0) goto eoferr;
     if (server.rdb_checksum) {
         memrev64ifbe(&cksum);
         if (cksum == 0) {
             serverLog(LL_WARNING,"RDB file was saved with checksum disabled: no check performed.");
         } else if (cksum != expected) {
             serverLog(LL_WARNING,"Wrong RDB checksum expected: (%llx) but "
                 "got (%llx). Aborting now.",
                     (unsigned long long)expected,
                     (unsigned long long)cksum);
             rdbExitReportCorruptRDB("RDB CRC error");
         }
     }
 }
 return C_OK;

 /* Unexpected end of file is handled here calling rdbReportReadError():
  * this will in turn either abort Redis in most cases, or if we are loading
  * the RDB file from a socket during initial SYNC (diskless replica mode),
  * we'll report the error to the caller, so that we can retry. */
 eoferr:
 serverLog(LL_WARNING,
     "Short read or OOM loading DB. Unrecoverable error, aborting now.");
 rdbReportReadError("Unexpected EOF reading RDB file");
 return C_ERR;
}

 

 

 

 

 

 

 

 

 

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值