aof
aof(append only file)是redis中区别于rdb的另一种持久化机制,通过追加存储每一条写命令达到持久化目的,恢复数据只需要顺序回放aof文件即可。相比于rdb的全量快照,aof就比较轻量只需要追加写操作,所以可以做到秒级备份数据或者每个写命令触发,缺点就是追加命令越来越多的情况下,aof文件变大,恢复时间变慢,因此redis也有rdb+aof混合方案来综合各自优势来提高aof效率,并且检查aof文件大小触发重写aof机制。
7.0版本aof生成逻辑
redis7.0使用多个来做aof记录,常规追加记录日志操作只需要一个文件即可,但是一个文件会遇到文件过大、内容冗余、主进程子进程操作同一个文件不一致等问题。所以7.0版本的aof使用多个文件来记录数据,子进程使用base文件记录fork点的所有数据(默认名为:appendonly.aof..base.<rdb|aof>),而incr文件由父进程创建来同时记录aof期间收集的写操作(默认名为:appendonly.aof..incr.aof),manefest文件来记录base、incr的文件信息(默认名为appendonly.aof.manefest)。当然在aof期间为防止意外情况,都是先往一个temp文件来记录数据,直到aof逻辑完毕,由主进程将这些文件rename转正。
rewriteAppendOnlyFileBackground-aof主逻辑
rewriteAppendOnlyFileBackground
函数是aof逻辑手动或者自动触发的入口函数:
int rewriteAppendOnlyFileBackground(void) {
pid_t childpid;
if (hasActiveChildProcess()) return C_ERR;
if (dirCreateIfMissing(server.aof_dirname) == -1) {
serverLog(LL_WARNING, "Can't open or create append-only dir %s: %s",
server.aof_dirname, strerror(errno));
server.aof_lastbgrewrite_status = C_ERR;
return C_ERR;
}
/* We set aof_selected_db to -1 in order to force the next call to the
* feedAppendOnlyFile() to issue a SELECT command. */
server.aof_selected_db = -1;
flushAppendOnlyFile(1);
if (openNewIncrAofForAppend() != C_OK) {
server.aof_lastbgrewrite_status = C_ERR;
return C_ERR;
}
server.stat_aof_rewrites++;
if ((childpid = redisFork(CHILD_TYPE_AOF)) == 0) {
char tmpfile[256];
/* Child */
redisSetProcTitle("redis-aof-rewrite");
redisSetCpuAffinity(server.aof_rewrite_cpulist);
snprintf(tmpfile,256,"temp-rewriteaof-bg-%d.aof", (int) getpid());
if (rewriteAppendOnlyFile(tmpfile) == C_OK) {
serverLog(LL_NOTICE,
"Successfully created the temporary AOF base file %s", tmpfile);
sendChildCowInfo(CHILD_INFO_TYPE_AOF_COW_SIZE, "AOF rewrite");
exitFromChild(0);
} else {
exitFromChild(1);
}
} else {
/* Parent */
if (childpid == -1) {
server.aof_lastbgrewrite_status = C_ERR;
serverLog(LL_WARNING,
"Can't rewrite append only file in background: fork: %s",
strerror(errno));
return C_ERR;
}
serverLog(LL_NOTICE,
"Background append only file rewriting started by pid %ld",(long) childpid);
server.aof_rewrite_scheduled = 0;
server.aof_rewrite_time_start = time(NULL);
return C_OK;
}
return C_OK; /* unreached */
}
hasActiveChildProcess
检查是否存在工作中的子进程,存在就不执行此次aofdirCreateIfMissing
保证aof文件目录存在,默认appendonlydir
flushAppendOnlyFile
输出aof_buf到aof文件,这里的aof_buf就是主进程在aof期间收集到的写指令,aof完成后追加到文件openNewIncrAofForAppend
打开一个fork前的临时文件temp-appendonly.aof.incr
,在后续fork出子进程写aof文件期间,主进程处理的所有写操作都会记录到这个文件里redisFork
生成aof子进程- 子进程执行
rewriteAppendOnlyFile
开始真正的aof逻辑:
- 打开临时aof文件
temp-rewriteaof-<pid>.aof
- 如果redis启动配置了
aof-use-rdb-preamble
策略,就调用rdbSaveRio
保存一份全量rdb数据到aof文件(由于aof记录的单条数据直接用的写命令的协议数据,导致aof实际记录的数据远大于rdb,所以引入了rdb+aof混合持久化机制,aof先写入rdb数据,再写入aof期间的指令到aof文件,读取aof文件也是判断文件头部是否有rdb数据再读取aof数据);否则就调用rewriteAppendOnlyFileRio
做aof追加写命令到临时文件做记录(实际就是读取每个key当前的数据生成一个写入指令到aof:例如string就生成一个SET KEY VALUE
,list就生成一个RPUSH KEY VALUE
)fflush
写入数据到内核缓冲区fsync
同步文件数据到磁盘rename
重命名文件为正式aof文件名(temp-rewriteaof-.aof),子进程完成aof
- 父进程直接不做什么逻辑,依然去执行事件循环,监听客户端命令,写操作命令就判断是否是aof期间,是的话调用
feedAppendOnlyFile
将命令分别写入aof_buf
缓冲区,此缓冲区会在合适的时候写入aof文件
flushAppendOnlyFile-将主进程收集的写操作写入aof文件
flushAppendOnlyFile
函数是将aof_buf
的内容写到aof文件的逻辑,这里是写入到incr文件
void flushAppendOnlyFile(int force) {
ssize_t nwritten;
int sync_in_progress = 0;
mstime_t latency;
if (sdslen(server.aof_buf) == 0) {
/* Check if we need to do fsync even the aof buffer is empty,
* because previously in AOF_FSYNC_EVERYSEC mode, fsync is
* called only when aof buffer is not empty, so if users
* stop write commands before fsync called in one second,
* the data in page cache cannot be flushed in time. */
if (server.aof_fsync == AOF_FSYNC_EVERYSEC &&
server.aof_fsync_offset != server.aof_current_size &&
server.unixtime > server.aof_last_fsync &&
!(sync_in_progress = aofFsyncInProgress())) {
goto try_fsync;
} else {
return;
}
}
if (server.aof_fsync == AOF_FSYNC_EVERYSEC)
sync_in_progress = aofFsyncInProgress();
if (server.aof_fsync == AOF_FSYNC_EVERYSEC && !force) {
/* With this append fsync policy we do background fsyncing.
* If the fsync is still in progress we can try to delay
* the write for a couple of seconds. */
if (sync_in_progress) {
if (server.aof_flush_postponed_start == 0) {
/* No previous write postponing, remember that we are
* postponing the flush and return. */
server.aof_flush_postponed_start = server.unixtime;
return;
} else if (server.unixtime - server.aof_flush_postponed_start < 2) {
/* We were already waiting for fsync to finish, but for less
* than two seconds this is still ok. Postpone again. */
return;
}
/* Otherwise fall through, and go write since we can't wait
* over two seconds. */
server.aof_delayed_fsync++;
serverLog(LL_NOTICE,"Asynchronous AOF fsync is taking too long (disk is busy?). Writing the AOF buffer without waiting for fsync to complete, this may slow down Redis.");
}
}
/* We want to perform a single write. This should be guaranteed atomic
* at least if the filesystem we are writing is a real physical one.
* While this will save us against the server being killed I don't think
* there is much to do about the whole server stopping for power problems
* or alike */
if (server.aof_flush_sleep && sdslen(server.aof_buf)) {
usleep(server.aof_flush_sleep);
}
latencyStartMonitor(latency);
nwritten = aofWrite(server.aof_fd,server.aof_buf,sdslen(server.aof_buf));
latencyEndMonitor(latency);
/* We want to capture different events for delayed writes:
* when the delay happens with a pending fsync, or with a saving child
* active, and when the above two conditions are missing.
* We also use an additional event name to save all samples which is
* useful for graphing / monitoring purposes. */
if (sync_in_progress) {
latencyAddSampleIfNeeded("aof-write-pending-fsync",latency);
} else if (hasActiveChildProcess()) {
latencyAddSampleIfNeeded("aof-write-active-child",latency);
} else {
latencyAddSampleIfNeeded("aof-write-alone",latency);
}
latencyAddSampleIfNeeded("aof-write",latency);
/* We performed the write so reset the postponed flush sentinel to zero. */
server.aof_flush_postponed_start = 0;
if (nwritten != (ssize_t)sdslen(server.aof_buf)) {
static time_t last_write_error_log = 0;
int can_log = 0;
/* Limit logging rate to 1 line per AOF_WRITE_LOG_ERROR_RATE seconds. */
if ((server.unixtime - last_write_error_log) > AOF_WRITE_LOG_ERROR_RATE) {
can_log = 1;
last_write_error_log = server.unixtime;
}
/* Log the AOF write error and record the error code. */
if (nwritten == -1) {
if (can_log) {
serverLog(LL_WARNING,"Error writing to the AOF file: %s",
strerror(errno));
}
server.aof_last_write_errno = errno;
} else {
if (can_log) {
serverLog(LL_WARNING,"Short write while writing to "
"the AOF file: (nwritten=%lld, "
"expected=%lld)",
(long long)nwritten,
(long long)sdslen(server.aof_buf));
}
if (ftruncate(server.aof_fd, server.aof_last_incr_size) == -1) {
if (can_log) {
serverLog(LL_WARNING, "Could not remove short write "
"from the append-only file. Redis may refuse "
"to load the AOF the next time it starts. "
"ftruncate: %s", strerror(errno));
}
} else {
/* If the ftruncate() succeeded we can set nwritten to
* -1 since there is no longer partial data into the AOF. */
nwritten = -1;
}
server.aof_last_write_errno = ENOSPC;
}
/* Handle the AOF write error. */
if (server.aof_fsync == AOF_FSYNC_ALWAYS) {
/* We can't recover when the fsync policy is ALWAYS since the reply
* for the client is already in the output buffers (both writes and
* reads), and the changes to the db can't be rolled back. Since we
* have a contract with the user that on acknowledged or observed
* writes are is synced on disk, we must exit. */
serverLog(LL_WARNING,"Can't recover from AOF write error when the AOF fsync policy is 'always'. Exiting...");
exit(1);
} else {
/* Recover from failed write leaving data into the buffer. However
* set an error to stop accepting writes as long as the error
* condition is not cleared. */
server.aof_last_write_status = C_ERR;
/* Trim the sds buffer if there was a partial write, and there
* was no way to undo it with ftruncate(2). */
if (nwritten > 0) {
server.aof_current_size += nwritten;
server.aof_last_incr_size += nwritten;
sdsrange(server.aof_buf,nwritten,-1);
}
return; /* We'll try again on the next call... */
}
} else {
/* Successful write(2). If AOF was in error state, restore the
* OK state and log the event. */
if (server.aof_last_write_status == C_ERR) {
serverLog(LL_WARNING,
"AOF write error looks solved, Redis can write again.");
server.aof_last_write_status = C_OK;
}
}
server.aof_current_size += nwritten;
server.aof_last_incr_size += nwritten;
/* Re-use AOF buffer when it is small enough. The maximum comes from the
* arena size of 4k minus some overhead (but is otherwise arbitrary). */
if ((sdslen(server.aof_buf)+sdsavail(server.aof_buf)) < 4000) {
sdsclear(server.aof_buf);
} else {
sdsfree(server.aof_buf);
server.aof_buf = sdsempty();
}
try_fsync:
/* Don't fsync if no-appendfsync-on-rewrite is set to yes and there are
* children doing I/O in the background. */
if (server.aof_no_fsync_on_rewrite && hasActiveChildProcess())
return;
/* Perform the fsync if needed. */
if (server.aof_fsync == AOF_FSYNC_ALWAYS) {
/* redis_fsync is defined as fdatasync() for Linux in order to avoid
* flushing metadata. */
latencyStartMonitor(latency);
/* Let's try to get this data on the disk. To guarantee data safe when
* the AOF fsync policy is 'always', we should exit if failed to fsync
* AOF (see comment next to the exit(1) after write error above). */
if (redis_fsync(server.aof_fd) == -1) {
serverLog(LL_WARNING,"Can't persist AOF for fsync error when the "
"AOF fsync policy is 'always': %s. Exiting...", strerror(errno));
exit(1);
}
latencyEndMonitor(latency);
latencyAddSampleIfNeeded("aof-fsync-always",latency);
server.aof_fsync_offset = server.aof_current_size;
server.aof_last_fsync = server.unixtime;
} else if ((server.aof_fsync == AOF_FSYNC_EVERYSEC &&
server.unixtime > server.aof_last_fsync)) {
if (!sync_in_progress) {
aof_background_fsync(server.aof_fd);
server.aof_fsync_offset = server.aof_current_size;
}
server.aof_last_fsync = server.unixtime;
}
}
- 如果
aof_buf
没有内容且当前是AOF_FSYNC_EVERYSEC
每秒刷盘的配置,则也检查下是不是需要刷盘,防止一些极限情况下没来得及刷盘造成数据丢失。否则就忽略本次落盘操作返回 - 否则
aof_buf
有内容且当前是AOF_FSYNC_EVERYSEC
每秒刷盘的配置,获取是否有后台工作线程正在执行刷盘。如果有线程在工作且不是强制模式(!force),就用aof_flush_postponed_start
变量记录一个延迟时间,可以1-2s后再执行,如果aof_flush_postponed_start
已经有值了且距离记录过去2s了,就强制执行下落盘 aofWrite
将aof_buf
数据写入文件try_fsync
标签:如果是AOF_FSYNC_ALWAYS
模式,直接执行fsync
刷盘,否则打开后台线程去执行刷盘
backgroundRewriteDoneHandler-主进程定时检查aof子进程是否完毕
backgroundRewriteDoneHandler
是主进程后台检测aof子进程是否完成作业的逻辑
void backgroundRewriteDoneHandler(int exitcode, int bysignal) {
if (!bysignal && exitcode == 0) {
char tmpfile[256];
long long now = ustime();
sds new_base_filepath = NULL;
sds new_incr_filepath = NULL;
aofManifest *temp_am;
mstime_t latency;
serverLog(LL_NOTICE,
"Background AOF rewrite terminated with success");
snprintf(tmpfile, 256, "temp-rewriteaof-bg-%d.aof",
(int)server.child_pid);
serverAssert(server.aof_manifest != NULL);
/* Dup a temporary aof_manifest for subsequent modifications. */
temp_am = aofManifestDup(server.aof_manifest);
/* Get a new BASE file name and mark the previous (if we have)
* as the HISTORY type. */
sds new_base_filename = getNewBaseFileNameAndMarkPreAsHistory(temp_am);
serverAssert(new_base_filename != NULL);
new_base_filepath = makePath(server.aof_dirname, new_base_filename);
/* Rename the temporary aof file to 'new_base_filename'. */
latencyStartMonitor(latency);
if (rename(tmpfile, new_base_filepath) == -1) {
serverLog(LL_WARNING,
"Error trying to rename the temporary AOF base file %s into %s: %s",
tmpfile,
new_base_filepath,
strerror(errno));
aofManifestFree(temp_am);
sdsfree(new_base_filepath);
server.aof_lastbgrewrite_status = C_ERR;
server.stat_aofrw_consecutive_failures++;
goto cleanup;
}
latencyEndMonitor(latency);
latencyAddSampleIfNeeded("aof-rename", latency);
serverLog(LL_NOTICE,
"Successfully renamed the temporary AOF base file %s into %s", tmpfile, new_base_filename);
/* Rename the temporary incr aof file to 'new_incr_filename'. */
if (server.aof_state == AOF_WAIT_REWRITE) {
/* Get temporary incr aof name. */
sds temp_incr_aof_name = getTempIncrAofName();
sds temp_incr_filepath = makePath(server.aof_dirname, temp_incr_aof_name);
/* Get next new incr aof name. */
sds new_incr_filename = getNewIncrAofName(temp_am);
new_incr_filepath = makePath(server.aof_dirname, new_incr_filename);
latencyStartMonitor(latency);
if (rename(temp_incr_filepath, new_incr_filepath) == -1) {
serverLog(LL_WARNING,
"Error trying to rename the temporary AOF incr file %s into %s: %s",
temp_incr_filepath,
new_incr_filepath,
strerror(errno));
bg_unlink(new_base_filepath);
sdsfree(new_base_filepath);
aofManifestFree(temp_am);
sdsfree(temp_incr_filepath);
sdsfree(new_incr_filepath);
sdsfree(temp_incr_aof_name);
server.aof_lastbgrewrite_status = C_ERR;
server.stat_aofrw_consecutive_failures++;
goto cleanup;
}
latencyEndMonitor(latency);
latencyAddSampleIfNeeded("aof-rename", latency);
serverLog(LL_NOTICE,
"Successfully renamed the temporary AOF incr file %s into %s", temp_incr_aof_name, new_incr_filename);
sdsfree(temp_incr_filepath);
sdsfree(temp_incr_aof_name);
}
/* Change the AOF file type in 'incr_aof_list' from AOF_FILE_TYPE_INCR
* to AOF_FILE_TYPE_HIST, and move them to the 'history_aof_list'. */
markRewrittenIncrAofAsHistory(temp_am);
/* Persist our modifications. */
if (persistAofManifest(temp_am) == C_ERR) {
bg_unlink(new_base_filepath);
aofManifestFree(temp_am);
sdsfree(new_base_filepath);
if (new_incr_filepath) {
bg_unlink(new_incr_filepath);
sdsfree(new_incr_filepath);
}
server.aof_lastbgrewrite_status = C_ERR;
server.stat_aofrw_consecutive_failures++;
goto cleanup;
}
sdsfree(new_base_filepath);
if (new_incr_filepath) sdsfree(new_incr_filepath);
/* We can safely let `server.aof_manifest` point to 'temp_am' and free the previous one. */
aofManifestFreeAndUpdate(temp_am);
if (server.aof_fd != -1) {
/* AOF enabled. */
server.aof_selected_db = -1; /* Make sure SELECT is re-issued */
server.aof_current_size = getAppendOnlyFileSize(new_base_filename, NULL) + server.aof_last_incr_size;
server.aof_rewrite_base_size = server.aof_current_size;
server.aof_fsync_offset = server.aof_current_size;
server.aof_last_fsync = server.unixtime;
}
/* We don't care about the return value of `aofDelHistoryFiles`, because the history
* deletion failure will not cause any problems. */
aofDelHistoryFiles();
server.aof_lastbgrewrite_status = C_OK;
server.stat_aofrw_consecutive_failures = 0;
serverLog(LL_NOTICE, "Background AOF rewrite finished successfully");
/* Change state from WAIT_REWRITE to ON if needed */
if (server.aof_state == AOF_WAIT_REWRITE)
server.aof_state = AOF_ON;
serverLog(LL_VERBOSE,
"Background AOF rewrite signal handler took %lldus", ustime()-now);
} else if (!bysignal && exitcode != 0) {
server.aof_lastbgrewrite_status = C_ERR;
server.stat_aofrw_consecutive_failures++;
serverLog(LL_WARNING,
"Background AOF rewrite terminated with error");
} else {
/* SIGUSR1 is whitelisted, so we have a way to kill a child without
* triggering an error condition. */
if (bysignal != SIGUSR1) {
server.aof_lastbgrewrite_status = C_ERR;
server.stat_aofrw_consecutive_failures++;
}
serverLog(LL_WARNING,
"Background AOF rewrite terminated by signal %d", bysignal);
}
cleanup:
aofRemoveTempFile(server.child_pid);
/* Clear AOF buffer and delete temp incr aof for next rewrite. */
if (server.aof_state == AOF_WAIT_REWRITE) {
sdsfree(server.aof_buf);
server.aof_buf = sdsempty();
aofDelTempIncrAofFile();
}
server.aof_rewrite_time_last = time(NULL)-server.aof_rewrite_time_start;
server.aof_rewrite_time_start = -1;
/* Schedule a new rewrite if we are waiting for it to switch the AOF ON. */
if (server.aof_state == AOF_WAIT_REWRITE)
server.aof_rewrite_scheduled = 1;
}
aofManifestDup
拷贝一份新的manifest数据,后续对manifest的变动都在这块临时数据上变动getNewBaseFileNameAndMarkPreAsHistory
通过计算manifest的文件自增序号获取一个新的正式aof文件名,默认`appendonly.aof..base.<rdb|aof>(这里用rdb还是aof取决于aof_use_rdb_preamble是否配置混合aof模式)- 将子进程产生的aof临时文件
temp-rewriteaof-bg-<pid>.aof
重命名为正式aof文件 markRewrittenIncrAofAsHistory
将之前的aof文件标记为history文件,退出历史舞台persistAofManifest
将manefest落盘,文件名默认是appendonly.aof.manifest
aofManifestFreeAndUpdate
将当前使用的内存manefest更新aofDelHistoryFiles
清理旧的aof文件记录(关闭打开的文件等)cleanup
标签:清理aof子进程产生的临时文件
自动重写-定时检查aof文件是否超大或者内容冗余
当aof文件过大(超过64M)或者增长过大,触发重写aof操作,即重新执行rewriteAppendOnlyFileBackground
操作重新生成一份aof文件,用来精简文件,例如100次set key value实际只需要保留最后一次set操作。具体操作是在每次serverCron检查文件大小:
int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
......
/* Trigger an AOF rewrite if needed. */
if (server.aof_state == AOF_ON &&
!hasActiveChildProcess() &&
server.aof_rewrite_perc &&
server.aof_current_size > server.aof_rewrite_min_size)
{
long long base = server.aof_rewrite_base_size ?
server.aof_rewrite_base_size : 1;
long long growth = (server.aof_current_size*100/base) - 100;
if (growth >= server.aof_rewrite_perc && !aofRewriteLimited()) {
serverLog(LL_NOTICE,"Starting automatic rewriting of AOF on %lld%% growth",growth);
rewriteAppendOnlyFileBackground();
}
}
......
}
定时检查aof文件,达到以下某个条件就重新生成新的aof文件:
- aof所有文件总大小超过64M
- aof的追加大小对比上一次aof的大小涨了100%
redis启动加载aof文件
aofLoadManifestFromDisk-加载aof的manefest文件内容
void aofLoadManifestFromDisk(void) {
server.aof_manifest = aofManifestCreate();
if (!dirExists(server.aof_dirname)) {
serverLog(LL_DEBUG, "The AOF directory %s doesn't exist", server.aof_dirname);
return;
}
sds am_name = getAofManifestFileName();
sds am_filepath = makePath(server.aof_dirname, am_name);
if (!fileExist(am_filepath)) {
serverLog(LL_DEBUG, "The AOF manifest file %s doesn't exist", am_name);
sdsfree(am_name);
sdsfree(am_filepath);
return;
}
aofManifest *am = aofLoadManifestFromFile(am_filepath);
if (am) aofManifestFreeAndUpdate(am);
sdsfree(am_name);
sdsfree(am_filepath);
}
- 直接读取
appendonly.manifest
文件内容,生成内存的aofManifest
结构,后续aof就是用这个am结构做内容管理
loadDataFromDisk-加载aof的内容文件
void loadDataFromDisk(void) {
long long start = ustime();
if (server.aof_state == AOF_ON) {
int ret = loadAppendOnlyFiles(server.aof_manifest);
if (ret == AOF_FAILED || ret == AOF_OPEN_ERR)
exit(1);
if (ret != AOF_NOT_EXIST)
serverLog(LL_NOTICE, "DB loaded from append only file: %.3f seconds", (float)(ustime()-start)/1000000);
} else {
rdbSaveInfo rsi = RDB_SAVE_INFO_INIT;
errno = 0; /* Prevent a stale value from affecting error checking */
int rdb_flags = RDBFLAGS_NONE;
if (iAmMaster()) {
/* Master may delete expired keys when loading, we should
* propagate expire to replication backlog. */
createReplicationBacklog();
rdb_flags |= RDBFLAGS_FEED_REPL;
}
int rdb_load_ret = rdbLoad(server.rdb_filename, &rsi, rdb_flags);
if (rdb_load_ret == RDB_OK) {
serverLog(LL_NOTICE,"DB loaded from disk: %.3f seconds",
(float)(ustime()-start)/1000000);
/* Restore the replication ID / offset from the RDB file. */
if (rsi.repl_id_is_set &&
rsi.repl_offset != -1 &&
/* Note that older implementations may save a repl_stream_db
* of -1 inside the RDB file in a wrong way, see more
* information in function rdbPopulateSaveInfo. */
rsi.repl_stream_db != -1)
{
if (!iAmMaster()) {
memcpy(server.replid,rsi.repl_id,sizeof(server.replid));
server.master_repl_offset = rsi.repl_offset;
/* If this is a replica, create a cached master from this
* information, in order to allow partial resynchronizations
* with masters. */
replicationCacheMasterUsingMyself();
selectDb(server.cached_master,rsi.repl_stream_db);
} else {
/* If this is a master, we can save the replication info
* as secondary ID and offset, in order to allow replicas
* to partial resynchronizations with masters. */
memcpy(server.replid2,rsi.repl_id,sizeof(server.replid));
server.second_replid_offset = rsi.repl_offset+1;
/* Rebase master_repl_offset from rsi.repl_offset. */
server.master_repl_offset += rsi.repl_offset;
serverAssert(server.repl_backlog);
server.repl_backlog->offset = server.master_repl_offset -
server.repl_backlog->histlen + 1;
rebaseReplicationBuffer(rsi.repl_offset);
server.repl_no_slaves_since = time(NULL);
}
}
} else if (rdb_load_ret != RDB_NOT_EXIST) {
serverLog(LL_WARNING, "Fatal error loading the DB, check server logs. Exiting.");
exit(1);
}
/* We always create replication backlog if server is a master, we need
* it because we put DELs in it when loading expired keys in RDB, but
* if RDB doesn't have replication info or there is no rdb, it is not
* possible to support partial resynchronization, to avoid extra memory
* of replication backlog, we drop it. */
if (server.master_repl_offset == 0 && server.repl_backlog)
freeReplicationBacklog();
}
}
- 如果打开了aof,
loadAppendOnlyFiles
执行aof加载:
- 通过manifest获取base文件,并调用
loadSingleAppendOnlyFile
读取- 通过manifest获取incr文件,并调用
loadSingleAppendOnlyFile
读取
loadSingleAppendOnlyFile-真正读取aof文件
不管base还是incr文件都是通过这个函数读取
int loadSingleAppendOnlyFile(char *filename) {
sds aof_filepath = makePath(server.aof_dirname, filename);
FILE *fp = fopen(aof_filepath, "r");
client *old_client = server.current_client;
fakeClient = server.current_client = createAOFClient();
/* Check if the AOF file is in RDB format (it may be RDB encoded base AOF
* or old style RDB-preamble AOF). In that case we need to load the RDB file
* and later continue loading the AOF tail if it is an old style RDB-preamble AOF. */
char sig[5]; /* "REDIS" */
if (fread(sig,1,5,fp) != 5 || memcmp(sig,"REDIS",5) != 0) {
/* Not in RDB format, seek back at 0 offset. */
if (fseek(fp,0,SEEK_SET) == -1) goto readerr;
} else {
/* RDB format. Pass loading the RDB functions. */
rio rdb;
int old_style = !strcmp(filename, server.aof_filename);
if (old_style)
serverLog(LL_NOTICE, "Reading RDB preamble from AOF file...");
else
serverLog(LL_NOTICE, "Reading RDB base file on AOF loading...");
if (fseek(fp,0,SEEK_SET) == -1) goto readerr;
rioInitWithFile(&rdb,fp);
if (rdbLoadRio(&rdb,RDBFLAGS_AOF_PREAMBLE,NULL) != C_OK) {
if (old_style)
serverLog(LL_WARNING, "Error reading the RDB preamble of the AOF file %s, AOF loading aborted", filename);
else
serverLog(LL_WARNING, "Error reading the RDB base file %s, AOF loading aborted", filename);
ret = AOF_FAILED;
goto cleanup;
} else {
loadingAbsProgress(ftello(fp));
last_progress_report_size = ftello(fp);
if (old_style) serverLog(LL_NOTICE, "Reading the remaining AOF tail...");
}
}
/* Read the actual AOF file, in REPL format, command by command. */
while(1) {
......
}
......
}
- 打开当前aof文件
- 读取前5个字符是否是REDIS,是的话就表示为aof的语法内容文件,否则为rdb+aof的混合文件先走
rdbLoadRio
读取rdb内容遇到RDB_OPCODE_EOF
读取rdb内容结束,接下来都走aof内容读取(while(1)内容)