【无标题】

#define REPL_MAX_WRITTEN_BEFORE_FSYNC (1024*1024*8) /* 8 MB */
void readSyncBulkPayload(connection *conn) {
    char buf[PROTO_IOBUF_LEN];
    ssize_t nread, readlen, nwritten;
    int use_diskless_load = useDisklessLoad();
    redisDb *diskless_load_tempDb = NULL;
    functionsLibCtx* temp_functions_lib_ctx = NULL;
    int empty_db_flags = server.repl_slave_lazy_flush ? EMPTYDB_ASYNC :
                                                        EMPTYDB_NO_FLAGS;
    off_t left;

    /* Static vars used to hold the EOF mark, and the last bytes received
     * from the server: when they match, we reached the end of the transfer. */
    static char eofmark[CONFIG_RUN_ID_SIZE];
    static char lastbytes[CONFIG_RUN_ID_SIZE];
    static int usemark = 0;

    /* If repl_transfer_size == -1 we still have to read the bulk length
     * from the master reply. */
    if (server.repl_transfer_size == -1) {
        nread = connSyncReadLine(conn,buf,1024,server.repl_syncio_timeout*1000);
        if (nread == -1) {
            serverLog(LL_WARNING,
                "I/O error reading bulk count from MASTER: %s",
                connGetLastError(conn));
            goto error;
        } else {
            /* nread here is returned by connSyncReadLine(), which calls syncReadLine() and
             * convert "\r\n" to '\0' so 1 byte is lost. */
            atomicIncr(server.stat_net_repl_input_bytes, nread+1);
        }

        if (buf[0] == '-') {
            serverLog(LL_WARNING,
                "MASTER aborted replication with an error: %s",
                buf+1);
            goto error;
        } else if (buf[0] == '\0') {
            /* At this stage just a newline works as a PING in order to take
             * the connection live. So we refresh our last interaction
             * timestamp. */
            server.repl_transfer_lastio = server.unixtime;
            return;
        } else if (buf[0] != '$') {
            serverLog(LL_WARNING,"Bad protocol from MASTER, the first byte is not '$' (we received '%s'), are you sure the host and port are right?", buf);
            goto error;
        }

        /* There are two possible forms for the bulk payload. One is the
         * usual $<count> bulk format. The other is used for diskless transfers
         * when the master does not know beforehand the size of the file to
         * transfer. In the latter case, the following format is used:
         *
         * $EOF:<40 bytes delimiter>
         *
         * At the end of the file the announced delimiter is transmitted. The
         * delimiter is long and random enough that the probability of a
         * collision with the actual file content can be ignored. */
        if (strncmp(buf+1,"EOF:",4) == 0 && strlen(buf+5) >= CONFIG_RUN_ID_SIZE) {
            usemark = 1;
            memcpy(eofmark,buf+5,CONFIG_RUN_ID_SIZE);
            memset(lastbytes,0,CONFIG_RUN_ID_SIZE);
/*
memcpy(eofmark,buf+5,CONFIG_RUN_ID_SIZE);:将一段内存空间(即从 buf + 5 开始的位置至接下来的
 CONFIG_RUN_ID_SIZE 个字节)内容复制到 eofmark 中,其中 eofmark 是一个全局变量,用于保存 “EOF:” 后面的复制 ID。这行代码实现了将复制 ID 缓存在 eofmark 变量中,以供后续使用。
memset(lastbytes,0,CONFIG_RUN_ID_SIZE);:将一段内存空间(即从 lastbytes 开始的位置至接下来的 CONFIG_RUN_ID_SIZE 个字节)内容全部置为0。其中lastbytes 变量表示用于在特定情况下缓存部分已经
接收的数据,在接收完所有数据后会对其加以处理。这行代码实现了对其内容进行清空,防止影响后续的同步操作。
这是为了避免接收到新的数据时,将缓存在lastbytes数组中的旧数据错误地用于数据同步。在接收到新数据之前,需要将缓存区的内容清空,这样才能保证接收到的数据是正确的。

在Redis主从同步的过程中,lastbytes数组用于缓存已经接收到的除"EOF:"标识外的数据部分。在数据传输
过程中,如果传输的数据部分大小超过了从节点设置的最大缓存空间,Redis将按照传输的数据块所指定的最
小窗口大小,循环接收数据,多余的数据将被缓存到lastbytes数组中。在接收所有的数据块之后,
lastbytes中缓存的是最后一个块中被截断的多出来的部分。

接下来,Redis从节点会将lastbytes数组中缓存的数据与接收到的最后一个数据块进行合并,形成完整的数
据,并开始进行数据存储或加载。这个数据块的大小不一定能够被整除,因此需要在lastbytes下一次接收
到数据块之前将lastbytes数组清空,防止上一个数据块中的残留数据对数据的正确性造成影响。

因此,在这个场景中,对lastbytes进行清空,即初始化为0,可以避免数据传输过程中遗留数据对数据同步
过程造成的影响,确保从节点接收到的所有数据都是正确的。
*/
            /* Set any repl_transfer_size to avoid entering this code path
             * at the next call. */
            server.repl_transfer_size = 0;
            serverLog(LL_NOTICE,
                "MASTER <-> REPLICA sync: receiving streamed RDB from master with EOF %s",
                use_diskless_load? "to parser":"to disk");
        } else {
            usemark = 0;
            server.repl_transfer_size = strtol(buf+1,NULL,10);
            serverLog(LL_NOTICE,
                "MASTER <-> REPLICA sync: receiving %lld bytes from master %s",
                (long long) server.repl_transfer_size,
                use_diskless_load? "to parser":"to disk");
        }
        return;
    }

    if (!use_diskless_load) {
        /* Read the data from the socket, store it to a file and search
         * for the EOF. */
        if (usemark) {
            readlen = sizeof(buf);
        } else {
/*
如果usemark为假,说明当前正在进行增量复制,那么本次从主节点读取的数据长度
不能超过尚未复制的总数据量left
*/
            left = server.repl_transfer_size - server.repl_transfer_read;
            readlen = (left < (signed)sizeof(buf)) ? left : (signed)sizeof(buf);
        }

        nread = connRead(conn,buf,readlen);
        if (nread <= 0) {
            if (connGetState(conn) == CONN_STATE_CONNECTED) {
                /* equivalent to EAGAIN */
                return;
            }
            serverLog(LL_WARNING,"I/O error trying to sync with MASTER: %s",
                (nread == -1) ? connGetLastError(conn) : "connection lost");
            cancelReplicationHandshake(1);
            return;
        }
        atomicIncr(server.stat_net_repl_input_bytes, nread);

        /* When a mark is used, we want to detect EOF asap in order to avoid
         * writing the EOF mark into the file... */
        int eof_reached = 0;
/*
如果处于RDB文件接收模式,usemark标志为1,该段代码将处理当前读取的数据,并检查是否到达了RDB文件
的末尾。具体来说,这个段落将更新之前保存的lastbytes数组,并检查是否与eofmark值(即标记接收RDB
完成的标志)匹配。匹配上即说明接收到RDB文件末尾的标志,那么此时应该退出RDB文件传输模式,并重新
设定repl_transfer_size的值,以应对后续新的同步传输。具体如下:
检查当前读取到的数据是否至少有一个完整的CONFIG_RUN_ID_SIZE长度,这个长度是在读取EOF标记时记录
的,也是从当前的buf中获取的。如果当前读取长度不够一个完整的CONFIG_RUN_ID_SIZE,则需要将上次读
取的的后CONFIG_RUN_ID_SIZE-nread部分缓存下来,并与当前读取到的部分拼接出一个完整的
CONFIG_RUN_ID_SIZE长度。这样可以避免先前读取的数据被丢失。
拼接之后,用memcmp()比较lastbytes数组与eofmark数组中连续的CONFIG_RUN_ID_SIZE字节是否相同;如
果是,则设置eof_reached标志为1,表示已经接收到了RDB文件的最后标志,RDB文件传输过程结束。

如果没有处于RDB文件接收模式,即usemark标志为0,则不需要进行处理,程序忽略这段代码并继续向下执行。

*/
        if (usemark) {
            /* Update the last bytes array, and check if it matches our
             * delimiter. */
            if (nread >= CONFIG_RUN_ID_SIZE) {
                memcpy(lastbytes,buf+nread-CONFIG_RUN_ID_SIZE,
                       CONFIG_RUN_ID_SIZE);
            } else {
                int rem = CONFIG_RUN_ID_SIZE-nread;
                memmove(lastbytes,lastbytes+nread,rem);
                memcpy(lastbytes+rem,buf,nread);
            }
            if (memcmp(lastbytes,eofmark,CONFIG_RUN_ID_SIZE) == 0)
                eof_reached = 1;
        }

        /* Update the last I/O time for the replication transfer (used in
         * order to detect timeouts during replication), and write what we
         * got from the socket to the dump file on disk. */
        server.repl_transfer_lastio = server.unixtime;
        if ((nwritten = write(server.repl_transfer_fd,buf,nread)) != nread) {
            serverLog(LL_WARNING,
                "Write error or short write writing to the DB dump file "
                "needed for MASTER <-> REPLICA synchronization: %s",
                (nwritten == -1) ? strerror(errno) : "short write");
            goto error;
        }
        server.repl_transfer_read += nread;

        /* Delete the last 40 bytes from the file if we reached EOF. */
        if (usemark && eof_reached) {
            if (ftruncate(server.repl_transfer_fd,
                server.repl_transfer_read - CONFIG_RUN_ID_SIZE) == -1)
            {
                serverLog(LL_WARNING,
                    "Error truncating the RDB file received from the master "
                    "for SYNC: %s", strerror(errno));
                goto error;
            }
        }

        /* Sync data on disk from time to time, otherwise at the end of the
         * transfer we may suffer a big delay as the memory buffers are copied
         * into the actual disk. */
        if (server.repl_transfer_read >=
            server.repl_transfer_last_fsync_off + REPL_MAX_WRITTEN_BEFORE_FSYNC)
        {
            off_t sync_size = server.repl_transfer_read -
                              server.repl_transfer_last_fsync_off;
            rdb_fsync_range(server.repl_transfer_fd,
                server.repl_transfer_last_fsync_off, sync_size);
            server.repl_transfer_last_fsync_off += sync_size;
        }

        /* Check if the transfer is now complete */
        if (!usemark) {
            if (server.repl_transfer_read == server.repl_transfer_size)
                eof_reached = 1;
        }

        /* If the transfer is yet not complete, we need to read more, so
         * return ASAP and wait for the handler to be called again. */
        if (!eof_reached) return;
    }

    /* We reach this point in one of the following cases:
     *
     * 1. The replica is using diskless replication, that is, it reads data
     *    directly from the socket to the Redis memory, without using
     *    a temporary RDB file on disk. In that case we just block and
     *    read everything from the socket.
     *
     * 2. Or when we are done reading from the socket to the RDB file, in
     *    such case we want just to read the RDB file in memory. */

    /* We need to stop any AOF rewriting child before flushing and parsing
     * the RDB, otherwise we'll create a copy-on-write disaster. */
    if (server.aof_state != AOF_OFF) stopAppendOnly();
    /* Also try to stop save RDB child before flushing and parsing the RDB:
     * 1. Ensure background save doesn't overwrite synced data after being loaded.
     * 2. Avoid copy-on-write disaster. */
    if (server.child_type == CHILD_TYPE_RDB) {
        if (!use_diskless_load) {
            serverLog(LL_NOTICE,
                "Replica is about to load the RDB file received from the "
                "master, but there is a pending RDB child running. "
                "Killing process %ld and removing its temp file to avoid "
                "any race",
                (long) server.child_pid);
        }
        killRDBChild();
    }

    if (use_diskless_load && server.repl_diskless_load == REPL_DISKLESS_LOAD_SWAPDB) {
        /* Initialize empty tempDb dictionaries. */
        diskless_load_tempDb = disklessLoadInitTempDb();
        temp_functions_lib_ctx = functionsLibCtxCreate();

        moduleFireServerEvent(REDISMODULE_EVENT_REPL_ASYNC_LOAD,
                              REDISMODULE_SUBEVENT_REPL_ASYNC_LOAD_STARTED,
                              NULL);
    } else {
        replicationAttachToNewMaster();

        serverLog(LL_NOTICE, "MASTER <-> REPLICA sync: Flushing old data");
        emptyData(-1,empty_db_flags,replicationEmptyDbCallback);
    }

    /* Before loading the DB into memory we need to delete the readable
     * handler, otherwise it will get called recursively since
     * rdbLoad() will call the event loop to process events from time to
     * time for non blocking loading. */
    connSetReadHandler(conn, NULL);
    
    serverLog(LL_NOTICE, "MASTER <-> REPLICA sync: Loading DB in memory");
    rdbSaveInfo rsi = RDB_SAVE_INFO_INIT;
    if (use_diskless_load) {
        rio rdb;
        redisDb *dbarray;
        functionsLibCtx* functions_lib_ctx;
        int asyncLoading = 0;

        if (server.repl_diskless_load == REPL_DISKLESS_LOAD_SWAPDB) {
            /* Async loading means we continue serving read commands during full resync, and
             * "swap" the new db with the old db only when loading is done.
             * It is enabled only on SWAPDB diskless replication when master replication ID hasn't changed,
             * because in that state the old content of the db represents a different point in time of the same
             * data set we're currently receiving from the master. */
            if (memcmp(server.replid, server.master_replid, CONFIG_RUN_ID_SIZE) == 0) {
                asyncLoading = 1;
            }
            dbarray = diskless_load_tempDb;
            functions_lib_ctx = temp_functions_lib_ctx;
        } else {
            dbarray = server.db;
            functions_lib_ctx = functionsLibCtxGetCurrent();
            functionsLibCtxClear(functions_lib_ctx);
        }

        rioInitWithConn(&rdb,conn,server.repl_transfer_size);

        /* Put the socket in blocking mode to simplify RDB transfer.
         * We'll restore it when the RDB is received. */
        connBlock(conn);
        connRecvTimeout(conn, server.repl_timeout*1000);
        startLoading(server.repl_transfer_size, RDBFLAGS_REPLICATION, asyncLoading);

        int loadingFailed = 0;
        rdbLoadingCtx loadingCtx = { .dbarray = dbarray, .functions_lib_ctx = functions_lib_ctx };
        if (rdbLoadRioWithLoadingCtx(&rdb,RDBFLAGS_REPLICATION,&rsi,&loadingCtx) != C_OK) {
            /* RDB loading failed. */
            serverLog(LL_WARNING,
                      "Failed trying to load the MASTER synchronization DB "
                      "from socket, check server logs.");
            loadingFailed = 1;
        } else if (usemark) {
            /* Verify the end mark is correct. */
            if (!rioRead(&rdb, buf, CONFIG_RUN_ID_SIZE) ||
                memcmp(buf, eofmark, CONFIG_RUN_ID_SIZE) != 0)
            {
                serverLog(LL_WARNING, "Replication stream EOF marker is broken");
                loadingFailed = 1;
            }
        }

        if (loadingFailed) {
            stopLoading(0);
            cancelReplicationHandshake(1);
            rioFreeConn(&rdb, NULL);

            if (server.repl_diskless_load == REPL_DISKLESS_LOAD_SWAPDB) {
                /* Discard potentially partially loaded tempDb. */
                moduleFireServerEvent(REDISMODULE_EVENT_REPL_ASYNC_LOAD,
                                      REDISMODULE_SUBEVENT_REPL_ASYNC_LOAD_ABORTED,
                                      NULL);

                disklessLoadDiscardTempDb(diskless_load_tempDb);
                functionsLibCtxFree(temp_functions_lib_ctx);
                serverLog(LL_NOTICE, "MASTER <-> REPLICA sync: Discarding temporary DB in background");
            } else {
                /* Remove the half-loaded data in case we started with an empty replica. */
                emptyData(-1,empty_db_flags,replicationEmptyDbCallback);
            }

            /* Note that there's no point in restarting the AOF on SYNC
             * failure, it'll be restarted when sync succeeds or the replica
             * gets promoted. */
            return;
        }

        /* RDB loading succeeded if we reach this point. */
        if (server.repl_diskless_load == REPL_DISKLESS_LOAD_SWAPDB) {
            /* We will soon swap main db with tempDb and replicas will start
             * to apply data from new master, we must discard the cached
             * master structure and force resync of sub-replicas. */
            replicationAttachToNewMaster();

            serverLog(LL_NOTICE, "MASTER <-> REPLICA sync: Swapping active DB with loaded DB");
            swapMainDbWithTempDb(diskless_load_tempDb);

            /* swap existing functions ctx with the temporary one */
            functionsLibCtxSwapWithCurrent(temp_functions_lib_ctx);

            moduleFireServerEvent(REDISMODULE_EVENT_REPL_ASYNC_LOAD,
                        REDISMODULE_SUBEVENT_REPL_ASYNC_LOAD_COMPLETED,
                        NULL);

            /* Delete the old db as it's useless now. */
            disklessLoadDiscardTempDb(diskless_load_tempDb);
            serverLog(LL_NOTICE, "MASTER <-> REPLICA sync: Discarding old DB in background");
        }

        /* Inform about db change, as replication was diskless and didn't cause a save. */
        server.dirty++;

        stopLoading(1);

        /* Cleanup and restore the socket to the original state to continue
         * with the normal replication. */
        rioFreeConn(&rdb, NULL);
        connNonBlock(conn);
        connRecvTimeout(conn,0);
    } else {

        /* Make sure the new file (also used for persistence) is fully synced
         * (not covered by earlier calls to rdb_fsync_range). */
        if (fsync(server.repl_transfer_fd) == -1) {
            serverLog(LL_WARNING,
                "Failed trying to sync the temp DB to disk in "
                "MASTER <-> REPLICA synchronization: %s",
                strerror(errno));
            cancelReplicationHandshake(1);
            return;
        }

        /* Rename rdb like renaming rewrite aof asynchronously. */
        int old_rdb_fd = open(server.rdb_filename,O_RDONLY|O_NONBLOCK);
        if (rename(server.repl_transfer_tmpfile,server.rdb_filename) == -1) {
            serverLog(LL_WARNING,
                "Failed trying to rename the temp DB into %s in "
                "MASTER <-> REPLICA synchronization: %s",
                server.rdb_filename, strerror(errno));
            cancelReplicationHandshake(1);
            if (old_rdb_fd != -1) close(old_rdb_fd);
            return;
        }
        /* Close old rdb asynchronously. */
        if (old_rdb_fd != -1) bioCreateCloseJob(old_rdb_fd, 0, 0);

        /* Sync the directory to ensure rename is persisted */
        if (fsyncFileDir(server.rdb_filename) == -1) {
            serverLog(LL_WARNING,
                "Failed trying to sync DB directory %s in "
                "MASTER <-> REPLICA synchronization: %s",
                server.rdb_filename, strerror(errno));
            cancelReplicationHandshake(1);
            return;
        }

        if (rdbLoad(server.rdb_filename,&rsi,RDBFLAGS_REPLICATION) != RDB_OK) {
            serverLog(LL_WARNING,
                "Failed trying to load the MASTER synchronization "
                "DB from disk, check server logs.");
            cancelReplicationHandshake(1);
            if (server.rdb_del_sync_files && allPersistenceDisabled()) {
                serverLog(LL_NOTICE,"Removing the RDB file obtained from "
                                    "the master. This replica has persistence "
                                    "disabled");
                bg_unlink(server.rdb_filename);
            }
            /* Note that there's no point in restarting the AOF on sync failure,
               it'll be restarted when sync succeeds or replica promoted. */
            return;
        }

        /* Cleanup. */
        if (server.rdb_del_sync_files && allPersistenceDisabled()) {
            serverLog(LL_NOTICE,"Removing the RDB file obtained from "
                                "the master. This replica has persistence "
                                "disabled");
            bg_unlink(server.rdb_filename);
        }

        zfree(server.repl_transfer_tmpfile);
        server.repl_transfer_fd = -1;
        server.repl_transfer_tmpfile = NULL;
    }

    /* Final setup of the connected slave <- master link */
    replicationCreateMasterClient(server.repl_transfer_s,rsi.repl_stream_db);
    server.repl_state = REPL_STATE_CONNECTED;
    server.repl_down_since = 0;

    /* Fire the master link modules event. */
    moduleFireServerEvent(REDISMODULE_EVENT_MASTER_LINK_CHANGE,
                          REDISMODULE_SUBEVENT_MASTER_LINK_UP,
                          NULL);

    /* After a full resynchronization we use the replication ID and
     * offset of the master. The secondary ID / offset are cleared since
     * we are starting a new history. */
    memcpy(server.replid,server.master->replid,sizeof(server.replid));
    server.master_repl_offset = server.master->reploff;
    clearReplicationId2();

    /* Let's create the replication backlog if needed. Slaves need to
     * accumulate the backlog regardless of the fact they have sub-slaves
     * or not, in order to behave correctly if they are promoted to
     * masters after a failover. */
    if (server.repl_backlog == NULL) createReplicationBacklog();
    serverLog(LL_NOTICE, "MASTER <-> REPLICA sync: Finished with success");

    if (server.supervised_mode == SUPERVISED_SYSTEMD) {
        redisCommunicateSystemd("STATUS=MASTER <-> REPLICA sync: Finished with success. Ready to accept connections in read-write mode.\n");
    }

    /* Send the initial ACK immediately to put this replica in online state. */
    if (usemark) replicationSendAck();

    /* Restart the AOF subsystem now that we finished the sync. This
     * will trigger an AOF rewrite, and when done will start appending
     * to the new file. */
    if (server.aof_enabled) restartAOFAfterSYNC();
    return;

error:
    cancelReplicationHandshake(1);
    return;
}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值