redis之复制之谜(二)

redis的复制第三版(异步复制之replica异步)

redis在2.2.0版本中引入了replica异步接收数据

replica连接上master,经过密码认证通过后(新加的密码认证过程,auth命令),replica发送SYNC命令,注册异步回调函数,不用阻塞等待。

  1. replica连接master
    syncWithMaster()
    {
    ...
     int fd = anetTcpConnect(NULL,server.masterhost,server.masterport);
    ...
    }
  2. replica通过auth命令进行认证
    syncWithMaster()
    {
    ...
        if(server.masterauth) {//配置了密码,master需要密码认证,则发送auth命令
            //发送命令
        	snprintf(authcmd, 1024, "AUTH %s\r\n", server.masterauth);
        	syncWrite(fd, authcmd, strlen(server.masterauth)+7, 5) 
            ...
    
            //读取认证结果
            syncReadLine(fd,buf,1024,3600) 
            ...
    
        
            //处理认证结果
            if (buf[0] != '+') { //认证失败
                ...
                return REDIS_ERR;
            }
        }
    ...
    }
  3. replica发送sync命令
    syncWithMaster()
    {
    ...
        syncWrite(fd,"SYNC \r\n",7,5)
    ...
    }
  4. 创建临时文件,注册异步回调函数
    syncWithMaster()
    {
    ...
        //创建临时文件,后续接收的数据都将写入此文件
        while(maxtries--) {
            snprintf(tmpfile,256,
                "temp-%d.%ld.rdb",(int)time(NULL),(long int)getpid());
            dfd = open(tmpfile,O_CREAT|O_WRONLY|O_EXCL,0644);
            if (dfd != -1) break;
            sleep(1);
        }
    
        ...
    
        //注册异步回调读函数
        aeCreateFileEvent(server.el, fd, AE_READABLE, readSyncBulkPayload, NULL)
        ...
        
        server.replstate = REDIS_REPL_TRANSFER; 
        server.repl_transfer_left = -1; //还需要接收多少字节
        server.repl_transfer_s = fd; //通信fd
        server.repl_transfer_fd = dfd; //临时文件fd
        server.repl_transfer_lastio = time(NULL); //最后接收数据时间,用于判断是否超时
        server.repl_transfer_tmpfile = zstrdup(tmpfile); //临时文件名,后续接收完后,将rename为rdb文件名
        
    ...
    }
  5. 回调异步函数接收数据
    void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) {
        char buf[4096];
        ssize_t nread, readlen;
        ...
    
        
        if (server.repl_transfer_left == -1) { //第一次回调,读取文件大小
            syncReadLine(fd,buf,1024,3600)
            ... 
    
           
            server.repl_transfer_left = strtol(buf+1,NULL,10);
            ...
            return; //直接退出了,等待下次的回调
        }
    
    
        /* Read bulk data */
        //读取数据
        readlen = (server.repl_transfer_left < (signed)sizeof(buf)) ?
            server.repl_transfer_left : (signed)sizeof(buf);
        nread = read(fd,buf,readlen);
        ...
    
        //更新时间
        server.repl_transfer_lastio = time(NULL);
    
        //将接收的数据写入本地临时文件
        write(server.repl_transfer_fd,buf,nread)
        ...
    
        //更新剩余字节数
        server.repl_transfer_left -= nread;
    
    
    
        /* Check if the transfer is now complete */
        if (server.repl_transfer_left == 0) { //文件接收完成
            
            //将临时文件切换成rdb文件
            rename(server.repl_transfer_tmpfile,server.dbfilename) 
    
            //清空数据库
            emptyDb();
            
            //删除可读事件
            aeDeleteFileEvent(server.el,server.repl_transfer_s,AE_READABLE);
    
            //加载rdb文件
            rdbLoad(server.dbfilename) 
    
            /* Final setup of the connected slave <- master link */
            zfree(server.repl_transfer_tmpfile);
            close(server.repl_transfer_fd);
            server.master = createClient(server.repl_transfer_s);
            server.master->flags |= REDIS_MASTER;
            server.master->authenticated = 1;
            server.replstate = REDIS_REPL_CONNECTED;
            redisLog(REDIS_NOTICE, "MASTER <-> SLAVE sync: Finished with success");
        }
    }
    

redis的复制第四版(异步复制之断点续传)

在redis2.8.0引入了新的命令psync, 根据偏移量进行断点续传。

为了能断点续传,则master需要进行命令的缓存,这样才能根据replica提供的偏移量进行正确的数据传输。但是master也不能无休止的缓存下去,master的内存有限,不可能为了复制而把内存耗尽,最终使用了循环缓冲区

replica

  1.  主动连接master
  2.  发送PING命令, 为了探测master是否已经启动完成,可以提供服务
  3.  如果需要密码,则通过AUTH发送密码进行认证
  4.  如果配置了监听端口(可能是docker等端口映射了),则通过REPLCONF命令发送给master,用于INFO命令的显示
  5.  通过PSYNC命令进行同步请求,第一次没有偏移量等信息,而且不知道master的runid(唯一标识符), 发送 PSYNC ? -1 (PSYNC Runid offset)
  6.  收到回复+FULLRESYNC
  7.  注册异步回调函数,进行文件的接收
  8.  接收rdb文件内容,写入本地文件
  9.  接收完后,加载rdb文件到内存中,恢复数据
  10.  继续接收后续命令,重放,和master保持一致

  1. 当网络出现问题,连接断开, 将会缓存偏移量等信息
  2. 当网络恢复后,进行重连
  3. 连接初始化等交互完成后,发送PSYNC runid offset
  4. 接收到 +CONTINUE, 则继续接受命令进行重放以保持和master一致

master

  1.  接收到PING命令,当自身已经一切准备就绪,则回复+PONG
  2.  接收到AUTH命令,进行密码校验,通过,则发送+OK,否则 -Error xxx
  3.  接收REPLCONF命令,解析,正常则返回+OK
  4.  接收到PSYNC命令,解析命令,进行runid的判断,是否是请求本服务器的请求?判断offset是否能从缓存中获取,返回+FULLRESYNC
  5.  判断是否有子进程存在,在则等待,否则生成子进程开始产生rdb文件
  6.  子进程结束,父进程注册异步回调函数进行文件传输
  7.  回调函数发送文件内容给replica
  8.  文件发送完成后,进行发送新的写命令给replica
  9.  当连接断开后,关闭连接
  10.  网络恢复后,replica连接上,进行一些列的交换后,接受到PSYNC runid offset
  11.  进过判断,offset在循环队列中,可以直接从队列中读取发送,发送+CONTINUE
  12. 发送完成后,后续持续的发送新的命令

代码逻辑如下:

replica

int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
...

 //定时进行检测重连master以及传输失败, 每秒钟调用一次
 run_with_period(1000) replicationCron();

...
}

void replicationCron(void) {

    //连接阶段,或者PING命令的响应,超时了, 断开连接
    /* Non blocking connection timeout? */
    if (server.masterhost &&
        (server.repl_state == REDIS_REPL_CONNECTING ||
         server.repl_state == REDIS_REPL_RECEIVE_PONG) &&
        (time(NULL)-server.repl_transfer_lastio) > server.repl_timeout)
    {
        redisLog(REDIS_WARNING,"Timeout connecting to the MASTER...");
        undoConnectWithMaster();
    }


    //正在传输rdb文件内容超时了
    /* Bulk transfer I/O timeout? */
    if (server.masterhost && server.repl_state == REDIS_REPL_TRANSFER &&
        (time(NULL)-server.repl_transfer_lastio) > server.repl_timeout)
    {
        redisLog(REDIS_WARNING,"Timeout receiving bulk data from MASTER... If the problem persists try to set the 'repl-timeout' parameter in redis.conf to a larger value.");
        replicationAbortSyncTransfer();
    }


    //以及同步完成,在后续的同步新的命令时超时
    /* Timed out master when we are an already connected slave? */
    if (server.masterhost && server.repl_state == REDIS_REPL_CONNECTED &&
        (time(NULL)-server.master->lastinteraction) > server.repl_timeout)
    {
        redisLog(REDIS_WARNING,"MASTER timeout: no data nor PING received...");
       //这里将缓存runid,offset等信息
        freeClient(server.master); 
    }

   
    //开始连接master
    /* Check if we should connect to a MASTER */
    if (server.repl_state == REDIS_REPL_CONNECT) {
        redisLog(REDIS_NOTICE,"Connecting to MASTER %s:%d",
            server.masterhost, server.masterport);

         //此函数是主要函数
        if (connectWithMaster() == REDIS_OK) {
            redisLog(REDIS_NOTICE,"MASTER <-> SLAVE sync started");
        }
    }

    //发送ACK,并且将offst也一并发给master
    /* Send ACK to master from time to time. */
    if (server.masterhost && server.master)
        replicationSendAck();

    ...

}

int connectWithMaster(void) {
    int fd;
    
    //主动连接master
    fd = anetTcpNonBlockConnect(NULL,server.masterhost,server.masterport);
    if (fd == -1) {
        redisLog(REDIS_WARNING,"Unable to connect to MASTER: %s",
            strerror(errno));
        return REDIS_ERR;
    }

    //注册连接成功后的回调函数
    if (aeCreateFileEvent(server.el,fd,AE_READABLE|AE_WRITABLE,syncWithMaster,NULL) ==
            AE_ERR)
    {
        close(fd);
        redisLog(REDIS_WARNING,"Can't create readable event for SYNC");
        return REDIS_ERR;
    }

    server.repl_transfer_lastio = server.unixtime;
    server.repl_transfer_s = fd;
    server.repl_state = REDIS_REPL_CONNECTING;
    return REDIS_OK;
}

void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) {
    char tmpfile[256], *err;
    int dfd, maxtries = 5;
    int sockerr = 0, psync_result;
   
    ...

    if (server.repl_state == REDIS_REPL_CONNECTING) {
        ...

        server.repl_state = REDIS_REPL_RECEIVE_PONG;

        /* Send the PING, don't check for errors at all, we have the timeout
         * that will take care about this. */
        syncWrite(fd,"PING\r\n",6,100);
        return;
    }

    //读取PING的响应
    /* Receive the PONG command. */
    if (server.repl_state == REDIS_REPL_RECEIVE_PONG) {
        char buf[1024];
        ...

        /* Read the reply with explicit timeout. */
        buf[0] = '\0';
        if (syncReadLine(fd,buf,sizeof(buf),
            server.repl_syncio_timeout*1000) == -1)
        {
            redisLog(REDIS_WARNING,
                "I/O error reading PING reply from master: %s",
                strerror(errno));
            goto error;
        }

        /* We accept only two replies as valid, a positive +PONG reply
         * (we just check for "+") or an authentication error.
         * Note that older versions of Redis replied with "operation not
         * permitted" instead of using a proper error code, so we test
         * both. */
        if (buf[0] != '+' &&
            strncmp(buf,"-NOAUTH",7) != 0 &&
            strncmp(buf,"-ERR operation not permitted",28) != 0)
        {
            redisLog(REDIS_WARNING,"Error reply to PING from master: '%s'",buf);
            goto error;
        } else {
            redisLog(REDIS_NOTICE,
                "Master replied to PING, replication can continue...");
        }
    }


    //如果配置了密码要求,则发送AUTH命令
    /* AUTH with the master if required. */
    if(server.masterauth) {
        err = sendSynchronousCommand(fd,"AUTH",server.masterauth,NULL);
        if (err[0] == '-') {
            redisLog(REDIS_WARNING,"Unable to AUTH to MASTER: %s",err);
            sdsfree(err);
            goto error;
        }
        sdsfree(err);
    }

    /* Set the slave port, so that Master's INFO command can list the
     * slave listening port correctly. */
    {
        sds port = sdsfromlonglong(server.port);
        err = sendSynchronousCommand(fd,"REPLCONF","listening-port",port,
                                         NULL);
        sdsfree(port);
        /* Ignore the error if any, not all the Redis versions support
         * REPLCONF listening-port. */
        if (err[0] == '-') {
            redisLog(REDIS_NOTICE,"(Non critical) Master does not understand REPLCONF listening-port: %s", err);
        }
        sdsfree(err);
    }

     //发送PSYNC请求
    /* Try a partial resynchonization. If we don't have a cached master
     * slaveTryPartialResynchronization() will at least try to use PSYNC
     * to start a full resynchronization so that we get the master run id
     * and the global offset, to try a partial resync at the next
     * reconnection attempt. */
    psync_result = slaveTryPartialResynchronization(fd);
    if (psync_result == PSYNC_CONTINUE) {
        redisLog(REDIS_NOTICE, "MASTER <-> SLAVE sync: Master accepted a Partial Resynchronization.");
        return;
    }

    //不支持PSYNC命令,将降级到SYNC命令
    /* Fall back to SYNC if needed. Otherwise psync_result == PSYNC_FULLRESYNC
     * and the server.repl_master_runid and repl_master_initial_offset are
     * already populated. */
    if (psync_result == PSYNC_NOT_SUPPORTED) {
        redisLog(REDIS_NOTICE,"Retrying with SYNC...");
        if (syncWrite(fd,"SYNC\r\n",6,server.repl_syncio_timeout*1000) == -1) {
            redisLog(REDIS_WARNING,"I/O error writing to MASTER: %s",
                strerror(errno));
            goto error;
        }
    }

    //准备一个临时文件
    /* Prepare a suitable temp file for bulk transfer */
    while(maxtries--) {
        snprintf(tmpfile,256,
            "temp-%d.%ld.rdb",(int)server.unixtime,(long int)getpid());
        dfd = open(tmpfile,O_CREAT|O_WRONLY|O_EXCL,0644);
        if (dfd != -1) break;
        sleep(1);
    }
    if (dfd == -1) {
        redisLog(REDIS_WARNING,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno));
        goto error;
    }

    //注册异步回调函数,进行rdb文件的接受
    /* Setup the non blocking download of the bulk file. */
    if (aeCreateFileEvent(server.el,fd, AE_READABLE,readSyncBulkPayload,NULL)
            == AE_ERR)
    {
        redisLog(REDIS_WARNING,
            "Can't create readable event for SYNC: %s (fd=%d)",
            strerror(errno),fd);
        goto error;
    }

    server.repl_state = REDIS_REPL_TRANSFER;
    server.repl_transfer_size = -1;
    server.repl_transfer_read = 0;
    server.repl_transfer_last_fsync_off = 0;
    server.repl_transfer_fd = dfd;
    server.repl_transfer_lastio = server.unixtime;
    server.repl_transfer_tmpfile = zstrdup(tmpfile);
    return;

error:
    close(fd);
    server.repl_transfer_s = -1;
    server.repl_state = REDIS_REPL_CONNECT;
    return;
}


int slaveTryPartialResynchronization(int fd) {
    char *psync_runid;
    char psync_offset[32];
    sds reply;

    ...

    server.repl_master_initial_offset = -1;

    if (server.cached_master) { //重连时,从缓存中获取offset和runid
        psync_runid = server.cached_master->replrunid;
        snprintf(psync_offset,sizeof(psync_offset),"%lld", server.cached_master->reploff+1);
        ...
    } else { //否则将发送 ? -1 ,进行全量同步
        ...
        psync_runid = "?";
        memcpy(psync_offset,"-1",3);
    }

    /* Issue the PSYNC command */
    reply = sendSynchronousCommand(fd,"PSYNC",psync_runid,psync_offset,NULL);

    //处理响应
    if (!strncmp(reply,"+FULLRESYNC",11)) { //需要全量同步
        ...
        //提前runid
        runid = strchr(reply,' '); 
        if (runid) {
            runid++;
            offset = strchr(runid,' ');
            if (offset) offset++;
        }
       ...
            memcpy(server.repl_master_runid, runid, offset-runid-1);
            server.repl_master_runid[REDIS_RUN_ID_SIZE] = '\0';
            server.repl_master_initial_offset = strtoll(offset,NULL,10);
           
      ...
        /* We are going to full resync, discard the cached master structure. */
        replicationDiscardCachedMaster();
        sdsfree(reply);
        return PSYNC_FULLRESYNC;
    }

    if (!strncmp(reply,"+CONTINUE",9)) { //可以继续断点续传
      ...
        sdsfree(reply);
        replicationResurrectCachedMaster(fd); //从缓存中恢复各种状态
        return PSYNC_CONTINUE;
    }

    //错误判断
  ...
    sdsfree(reply);
    replicationDiscardCachedMaster();
    return PSYNC_NOT_SUPPORTED;
}

master

void syncCommand(redisClient *c) {
    /* ignore SYNC if already slave or in monitor mode */
    if (c->flags & REDIS_SLAVE) return;

    /* Refuse SYNC requests if we are a slave but the link with our master
     * is not ok... */
    if (server.masterhost && server.repl_state != REDIS_REPL_CONNECTED) {
        addReplyError(c,"Can't SYNC while not connected with my master");
        return;
    }

    /* SYNC can't be issued when the server has pending data to send to
     * the client about already issued commands. We need a fresh reply
     * buffer registering the differences between the BGSAVE and the current
     * dataset, so that we can copy to other slaves if needed. */
    if (listLength(c->reply) != 0 || c->bufpos != 0) {
        addReplyError(c,"SYNC and PSYNC are invalid with pending output");
        return;
    }

    redisLog(REDIS_NOTICE,"Slave asks for synchronization");

    /* Try a partial resynchronization if this is a PSYNC command.
     * If it fails, we continue with usual full resynchronization, however
     * when this happens masterTryPartialResynchronization() already
     * replied with:
     *
     * +FULLRESYNC <runid> <offset>
     *
     * So the slave knows the new runid and offset to try a PSYNC later
     * if the connection with the master is lost. */
    if (!strcasecmp(c->argv[0]->ptr,"psync")) {
        //判断是否能断点续传
        if (masterTryPartialResynchronization(c) == REDIS_OK) {
            server.stat_sync_partial_ok++;
            return; /* No full resync needed, return. */
        } else {
            char *master_runid = c->argv[1]->ptr;

            /* Increment stats for failed PSYNCs, but only if the
             * runid is not "?", as this is used by slaves to force a full
             * resync on purpose when they are not albe to partially
             * resync. */
            if (master_runid[0] != '?') server.stat_sync_partial_err++;
        }
    } else {
        /* If a slave uses SYNC, we are dealing with an old implementation
         * of the replication protocol (like redis-cli --slave). Flag the client
         * so that we don't expect to receive REPLCONF ACK feedbacks. */
        c->flags |= REDIS_PRE_PSYNC_SLAVE;
    }

    /* Full resynchronization. */
    server.stat_sync_full++;

    //需要全量同步,将产生子进程进行处理

    /* Here we need to check if there is a background saving operation
     * in progress, or if it is required to start one */
    if (server.rdb_child_pid != -1) { //有子进程,需要等待
        /* Ok a background save is in progress. Let's check if it is a good
         * one for replication, i.e. if there is another slave that is
         * registering differences since the server forked to save */
        redisClient *slave;
        listNode *ln;
        listIter li;

         //查询是否有正在等待的replica
        listRewind(server.slaves,&li);
        while((ln = listNext(&li))) {
            slave = ln->value;
            if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_END) break;
        }
        if (ln) { //有相同状态的replica,将它的缓存拷贝,并且可以使用同一份rdb文件,不用再产生子进程
            /* Perfect, the server is already registering differences for
             * another slave. Set the right state, and copy the buffer. */
            copyClientOutputBuffer(c,slave);
            c->replstate = REDIS_REPL_WAIT_BGSAVE_END;
            redisLog(REDIS_NOTICE,"Waiting for end of BGSAVE for SYNC");
        } else { //需要等待子进程结束,然后生成子进程进行rdb
            /* No way, we need to wait for the next BGSAVE in order to
             * register differences */
            c->replstate = REDIS_REPL_WAIT_BGSAVE_START;
            redisLog(REDIS_NOTICE,"Waiting for next BGSAVE for SYNC");
        }
    } else {
        /* Ok we don't have a BGSAVE in progress, let's start one */
        redisLog(REDIS_NOTICE,"Starting BGSAVE for SYNC");
         //没有子进程,现在可以立刻产生子进程进行rdb
        if (rdbSaveBackground(server.rdb_filename) != REDIS_OK) {
            redisLog(REDIS_NOTICE,"Replication failed, can't BGSAVE");
            addReplyError(c,"Unable to perform background save");
            return;
        }
        c->replstate = REDIS_REPL_WAIT_BGSAVE_END;
        /* Flush the script cache for the new slave. */
        replicationScriptCacheFlush();
    }

    if (server.repl_disable_tcp_nodelay)
        anetDisableTcpNoDelay(NULL, c->fd); /* Non critical if it fails. */
    c->repldbfd = -1;
    c->flags |= REDIS_SLAVE;
    server.slaveseldb = -1; /* Force to re-emit the SELECT command. */

    //将当前replica加入到replica队列中
    listAddNodeTail(server.slaves,c);

    //当第一个replica到达,并且缓存队列为空,则创建缓存队列
    if (listLength(server.slaves) == 1 && server.repl_backlog == NULL)
        createReplicationBacklog();
    return;
}

int masterTryPartialResynchronization(redisClient *c) {
    long long psync_offset, psync_len;
    char *master_runid = c->argv[1]->ptr;
    char buf[128];
    int buflen;

    //判断是否是请求当前服务器的,(可能是master挂了,此replica切换为master),不是则需要全量同步
    /* Is the runid of this master the same advertised by the wannabe slave
     * via PSYNC? If runid changed this master is a different instance and
     * there is no way to continue. */
    if (strcasecmp(master_runid, server.runid)) {
        /* Run id "?" is used by slaves that want to force a full resync. */
        if (master_runid[0] != '?') {
            redisLog(REDIS_NOTICE,"Partial resynchronization not accepted: "
                "Runid mismatch (Client asked for '%s', I'm '%s')",
                master_runid, server.runid);
        } else {
            redisLog(REDIS_NOTICE,"Full resync requested by slave.");
        }
        goto need_full_resync;
    }

    //获取请求的offset
    /* We still have the data our slave is asking for? */
    if (getLongLongFromObjectOrReply(c,c->argv[2],&psync_offset,NULL) !=
       REDIS_OK) goto need_full_resync;

     //判断offset是否在缓存区范围内,不在则需要全量同步
    if (!server.repl_backlog ||
        psync_offset < server.repl_backlog_off ||
        psync_offset > (server.repl_backlog_off + server.repl_backlog_histlen))
    {
        redisLog(REDIS_NOTICE,
            "Unable to partial resync with the slave for lack of backlog (Slave request was: %lld).", psync_offset);
        if (psync_offset > server.master_repl_offset) {
            redisLog(REDIS_WARNING,
                "Warning: slave tried to PSYNC with an offset that is greater than the master replication offset.");
        }
        goto need_full_resync;
    }

    /* If we reached this point, we are able to perform a partial resync:
     * 1) Set client state to make it a slave.
     * 2) Inform the client we can continue with +CONTINUE
     * 3) Send the backlog data (from the offset to the end) to the slave. */
    c->flags |= REDIS_SLAVE;
    c->replstate = REDIS_REPL_ONLINE;
    c->repl_ack_time = server.unixtime;
    listAddNodeTail(server.slaves,c); 
    /* We can't use the connection buffers since they are used to accumulate
     * new commands at this stage. But we are sure the socket send buffer is
     * emtpy so this write will never fail actually. */
    buflen = snprintf(buf,sizeof(buf),"+CONTINUE\r\n"); //发送continue
    if (write(c->fd,buf,buflen) != buflen) {
        freeClientAsync(c);
        return REDIS_OK;
    }

    //发送offset到最新的数据
    psync_len = addReplyReplicationBacklog(c,psync_offset);
    redisLog(REDIS_NOTICE,
        "Partial resynchronization request accepted. Sending %lld bytes of backlog starting from offset %lld.", psync_len, psync_offset);
    /* Note that we don't need to set the selected DB at server.slaveseldb
     * to -1 to force the master to emit SELECT, since the slave already
     * has this state from the previous connection with the master. */

    refreshGoodSlavesCount();
    return REDIS_OK; /* The caller can return, no full resync needed. */

need_full_resync:
    /* We need a full resync for some reason... notify the client. */
    psync_offset = server.master_repl_offset;
    /* Add 1 to psync_offset if it the replication backlog does not exists
     * as when it will be created later we'll increment the offset by one. */
    if (server.repl_backlog == NULL) psync_offset++;
    /* Again, we can't use the connection buffers (see above). */
    buflen = snprintf(buf,sizeof(buf),"+FULLRESYNC %s %lld\r\n",
                      server.runid,psync_offset);
    if (write(c->fd,buf,buflen) != buflen) {
        freeClientAsync(c);
        return REDIS_OK;
    }
    return REDIS_ERR;
}

//子进程结束后的处理函数
void backgroundSaveDoneHandler(int exitcode, int bysignal) {
...
    updateSlavesWaitingBgsave(exitcode == 0 ? REDIS_OK : REDIS_ERR);
...
}

void updateSlavesWaitingBgsave(int bgsaveerr) {
    listNode *ln;
    int startbgsave = 0;
    listIter li;

    //遍历所有的replica
    listRewind(server.slaves,&li);
    while((ln = listNext(&li))) {
        redisClient *slave = ln->value;

        if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START) { //在等待上一次rdb结束的
            startbgsave = 1; //可以开始rdb生成了
            slave->replstate = REDIS_REPL_WAIT_BGSAVE_END;
        } else if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_END) { 
            struct redis_stat buf;

            if (bgsaveerr != REDIS_OK) {
                freeClient(slave);
                redisLog(REDIS_WARNING,"SYNC failed. BGSAVE child returned an error");
                continue;
            }

            //打开生成的rdb二五年间
            if ((slave->repldbfd = open(server.rdb_filename,O_RDONLY)) == -1 ||
                redis_fstat(slave->repldbfd,&buf) == -1) {
                freeClient(slave);
                redisLog(REDIS_WARNING,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno));
                continue;
            }
            slave->repldboff = 0;
            slave->repldbsize = buf.st_size;
            slave->replstate = REDIS_REPL_SEND_BULK;
            aeDeleteFileEvent(server.el,slave->fd,AE_WRITABLE);
             //注册异步回调函数,将文件发送给replica
            if (aeCreateFileEvent(server.el, slave->fd, AE_WRITABLE, sendBulkToSlave, slave) == AE_ERR) {
                freeClient(slave);
                continue;
            }
        }
    }
  
   //需要开始子进程生成rdb
    if (startbgsave) {
        /* Since we are starting a new background save for one or more slaves,
         * we flush the Replication Script Cache to use EVAL to propagate every
         * new EVALSHA for the first time, since all the new slaves don't know
         * about previous scripts. */
        replicationScriptCacheFlush();
        //开启子进程进行rdb生成
        if (rdbSaveBackground(server.rdb_filename) != REDIS_OK) {
           ...
            }
        }
    }
}


void sendBulkToSlave(aeEventLoop *el, int fd, void *privdata, int mask) {
    redisClient *slave = privdata;
    ...

    if (slave->repldboff == 0) {
        //发送文件大小
        bulkcount = sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long)
            slave->repldbsize);
        if (write(fd,bulkcount,sdslen(bulkcount)) != (signed)sdslen(bulkcount))
        {
            sdsfree(bulkcount);
            freeClient(slave);
            return;
        }
        sdsfree(bulkcount);
    }
    lseek(slave->repldbfd,slave->repldboff,SEEK_SET);
    buflen = read(slave->repldbfd,buf,REDIS_IOBUF_LEN); //读取内容
    if (buflen <= 0) {
        redisLog(REDIS_WARNING,"Read error sending DB to slave: %s",
            (buflen == 0) ? "premature EOF" : strerror(errno));
        freeClient(slave);
        return;
    }
    if ((nwritten = write(fd,buf,buflen)) == -1) { //发送内容
        redisLog(REDIS_VERBOSE,"Write error sending DB to slave: %s",
            strerror(errno));
        freeClient(slave);
        return;
    }
    slave->repldboff += nwritten;
    if (slave->repldboff == slave->repldbsize) { //发送完成
        close(slave->repldbfd);
        slave->repldbfd = -1;
        aeDeleteFileEvent(server.el,slave->fd,AE_WRITABLE);
        slave->replstate = REDIS_REPL_ONLINE;
        slave->repl_ack_time = server.unixtime;

        //注册异步回调函数,将后续的命令发送给replica
        if (aeCreateFileEvent(server.el, slave->fd, AE_WRITABLE,
            sendReplyToClient, slave) == AE_ERR) {
            freeClient(slave);
            return;
        }
        refreshGoodSlavesCount();
        redisLog(REDIS_NOTICE,"Synchronization with slave succeeded");
    }
}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值