redis3.0源码学习之可持久化化操作

redis3.0源码学习之可持久化化操作

前言

这一章主要介绍redis的三种可持久化操作,分别是rdb,aof,以及aof重写机制!

一、简单介绍RDB,AOF,REWRITEAOF

RDB:类似于快照存储某一时刻的数据,对数据进行压缩,体积小。
AOF:存储的是到目前为止的数据,记录的是原始数据,丢失数据概率少,体积大。
AOF重写:是为了防止AOF文件过大的一种机制

二、源码分析

1.RDB源码分析

1.1、RDB触发:Rdb思想是把当前进程数据生成快照保存到硬盘的过程,保存数据库的键值对。触发RDB持久化过程分为手动触发和自动触发。

在这里插入图片描述

由于配置文件自动触发(子进程异步)

struct saveparam {
    time_t seconds;
    int changes;
};
struct redisServer server; /* Server global state */

/* RDB persistence RDB持久化相关的对象*/
//表示最后一次save之后数据变化
    long long dirty;                /* Changes to DB from the last save */
    long long dirty_before_bgsave;  /* Used to restore dirty on failed BGSAVE */
    pid_t rdb_child_pid;            /* PID of RDB saving child */
    struct saveparam *saveparams;   /* Save points array for RDB */
    int saveparamslen;              /* Number of saving points */
    char *rdb_filename;             /* Name of RDB file */
    int rdb_compression;            /* Use compression in RDB? */
    int rdb_checksum;               /* Use RDB checksum? */
//表示上一次save时间
    time_t lastsave;                /* Unix time of last successful save */
    time_t lastbgsave_try;          /* Unix time of last attempted bgsave */
    time_t rdb_save_time_last;      /* Time used by last RDB save run. */
    time_t rdb_save_time_start;     /* Current RDB save start time. */
    int rdb_bgsave_scheduled;       /* BGSAVE when possible if true. */
    int rdb_child_type;             /* Type of save by active child. */
    int lastbgsave_status;          /* C_OK or C_ERR */
    int stop_writes_on_bgsave_err;  /* Don't allow writes if can't BGSAVE */
    int rdb_pipe_write_result_to_parent; /* RDB pipes used to return the state */
    int rdb_pipe_read_result_from_child; /* of each slave in diskless SYNC. */
    /* Pipe and data structures for child -> parent info sharing. */
	for (j = 0; j < server.saveparamslen; j++) {
          struct saveparam *sp = server.saveparams+j;

          /* Save if we reached the given amount of changes,
           * the given amount of seconds, and if the latest bgsave was
           * successful or if, in case of an error, at least
           * CONFIG_BGSAVE_RETRY_DELAY seconds already elapsed. */
          if (server.dirty >= sp->changes &&
              server.unixtime-server.lastsave > sp->seconds &&
              (server.unixtime-server.lastbgsave_try >
               CONFIG_BGSAVE_RETRY_DELAY ||
               server.lastbgsave_status == C_OK))
          {
              serverLog(LL_NOTICE,"%d changes in %d seconds. Saving...",
                  sp->changes, (int)sp->seconds);
              rdbSaveInfo rsi, *rsiptr;
              rsiptr = rdbPopulateSaveInfo(&rsi);
              rdbSaveBackground(server.rdb_filename,rsiptr);
              break;
          }
      }
由于主从复制的自动触发(子进程异步):
void syncCommand(redisClient *c) {

  ...........
        /* Ok we don't have a BGSAVE in progress, let's start one */
        // 没有 BGSAVE 在进行,开始一个新的 BGSAVE
        redisLog(REDIS_NOTICE,"Starting BGSAVE for SYNC");
        if (rdbSaveBackground(server.rdb_filename) != REDIS_OK) {
            redisLog(REDIS_NOTICE,"Replication failed, can't BGSAVE");
            addReplyError(c,"Unable to perform background save");
            return;
        }
        // 设置状态
        c->replstate = REDIS_REPL_WAIT_BGSAVE_END;
        /* Flush the script cache for the new slave. */
.............

}
debug reload触发(同步):
void debugCommand(redisClient *c) {
..........
    	else if (!strcasecmp(c->argv[1]->ptr,"reload")) {
        if (rdbSave(server.rdb_filename) != REDIS_OK) {
            addReply(c,shared.err);
            return;
        }
..............

shutdown触发(同步):
ServerCron:
    if (server.shutdown_asap) {

        // 尝试关闭服务器
        if (prepareForShutdown(0) == REDIS_OK) exit(0);

        // 如果关闭失败,那么打印 LOG ,并移除关闭标识
        redisLog(REDIS_WARNING,"SIGTERM received but errors trying to shut down the server, check the logs for more information");
        server.shutdown_asap = 0;
    }
prepareForShutDown:
	int prepareForShutdown(int flags) {
  ........

    // 如果客户端执行的是 SHUTDOWN save ,或者 SAVE 功能被打开
    // 那么执行 SAVE 操作
    if ((server.saveparamslen > 0 && !nosave) || save) {
        redisLog(REDIS_NOTICE,"Saving the final RDB snapshot before exiting.");
        /* Snapshotting. Perform a SYNC SAVE and exit */
        if (rdbSave(server.rdb_filename) != REDIS_OK) {
            /* Ooops.. error saving! The best we can do is to continue
             * operating. Note that if there was a background saving process,
             * in the next cron() Redis will be notified that the background
             * saving aborted, handling special stuff like slaves pending for
             * synchronization... */
            redisLog(REDIS_WARNING,"Error trying to save the DB, can't exit.");
            return REDIS_ERR;
        }
    }
...........
    return REDIS_OK;
}
save(同步):
void saveCommand(client *c) {
    if (server.rdb_child_pid != -1) {
        addReplyError(c,"Background save already in progress");
        return;
    }
    rdbSaveInfo rsi, *rsiptr;
    rsiptr = rdbPopulateSaveInfo(&rsi);
    if (rdbSave(server.rdb_filename,rsiptr) == C_OK) {
        addReply(c,shared.ok);
    } else {
        addReply(c,shared.err);
    }
}
bgsave(子进程异步):
1. 当已经在进行bgsave时,不允许在进行
2. 当正在进行aof rewrite时,如果schedule设置为1,这进行延迟执行;否则直接回复错误。
if (server.rdb_child_pid != -1) {
        addReplyError(c,"Background save already in progress");
    } else if (server.aof_child_pid != -1) {
        if (schedule) {
            server.rdb_bgsave_scheduled = 1;
            addReplyStatus(c,"Background saving scheduled");
        } else {
            addReplyError(c,
                "An AOF log rewriting in progress: can't BGSAVE right now. "
                "Use BGSAVE SCHEDULE in order to schedule a BGSAVE whenever "
                "possible.");
        }
    } else if (rdbSaveBackground(server.rdb_filename,rsiptr) == C_OK) {
        addReplyStatus(c,"Background saving started");
    } else {
        addReply(c,shared.err);
    }
2.1、bgsave执行流程:
1:判断当前是否有rdb子线程,如果有则返回错误
2:判断当前是否有bgrewrite,如果有记录进行延时
3:创建子进程进行数据压缩写入文件持久化
4:完成数据快照后向父进程发送子进程结束信号
5:父进程在ServerCorn定时器中wait3进程回收子进程避免僵尸进程的出现
if ((pid = wait3(&statloc,WNOHANG,NULL)) != 0) {
           int exitcode = WEXITSTATUS(statloc);
           int bysignal = 0;

           if (WIFSIGNALED(statloc)) bysignal = WTERMSIG(statloc);

           if (pid == -1) {
               serverLog(LL_WARNING,"wait3() returned an error: %s. "
                   "rdb_child_pid = %d, aof_child_pid = %d",
                   strerror(errno),
                   (int) server.rdb_child_pid,
                   (int) server.aof_child_pid);
           } else if (pid == server.rdb_child_pid) {
               backgroundSaveDoneHandler(exitcode,bysignal);
               if (!bysignal && exitcode == 0) receiveChildInfo();
           } else if (pid == server.aof_child_pid) {
               backgroundRewriteDoneHandler(exitcode,bysignal);
               if (!bysignal && exitcode == 0) receiveChildInfo();
           } else {
               if (!ldbRemoveChild(pid)) {
                   serverLog(LL_WARNING,
                       "Warning, detected child with unmatched pid: %ld",
                       (long)pid);
               }
           }
           updateDictResizePolicy();
           closeChildInfoPipe();
       }
   }
2.3、RDB结构体(5.0版本)
针对于数据压缩部分:因为我们知道整型要比字符串更省内存,因此在保存对象时,都是优先尝试是否可以INT编码

在这里插入图片描述

2.4、对于过期键值的处理

- 创建rdb时候会过滤掉过期的键
- 读取时:
  - 主:只会载入未过期的;
  - 从:全部载入,因为后期主从会同步

2.AOF源码分析

AOF(append only file):以独立日志的方式记录每次写命令,重启时再重新执行AOF文件中的命令达到恢复数据的目的。AOF的主要作用是解决了数据持久化的实时性,目前已经是Redis持久化的主流方式。

数据结构:

redis中aof,是先保存在aof缓冲区中的,数据结构见redisServer:


// aof 缓冲区
struct redisServer {
    ...
    /* AOF persistence */
    int aof_state;                  /* AOF_(ON|OFF|WAIT_REWRITE) 
    AOF 功能从 off switch 到 on 后,aof_state 会从 AOF_OFF 变为 AOF_WAIT_REWRITE,startAppendOnly 函数完成该逻辑。在 aofrewrite 一次之后,该变量才会从 AOF_WAIT_REWRITE 变为 AOF_ON。*/
    int aof_fsync;                  /* Kind of fsync() policy
    同步策略
     */
    char *aof_filename;             /* Name of the AOF file 
    名字*/
    int aof_no_fsync_on_rewrite;    /* Don't fsync if a rewrite is in prog. */
    int aof_rewrite_perc;           /* Rewrite AOF if % growth is > M and... */
    off_t aof_rewrite_min_size;     /* the AOF file is at least N bytes. */
    off_t aof_rewrite_base_size;    /* AOF size on latest startup or rewrite. */
    off_t aof_current_size;         /* AOF current size. */
    int aof_rewrite_scheduled;      /* Rewrite once BGSAVE terminates. */
    pid_t aof_child_pid;            /* PID if rewriting process */
    list *aof_rewrite_buf_blocks;   /* Hold changes during an AOF rewrite. */
    sds aof_buf;      /* AOF buffer, written before entering the event loop */
    int aof_fd;       /* File descriptor of currently selected AOF file */
    int aof_selected_db; /* Currently selected DB in AOF */
    time_t aof_flush_postponed_start; /* UNIX time of postponed AOF flush */
    time_t aof_last_fsync;            /* UNIX time of last fsync() */
    time_t aof_rewrite_time_last;   /* Time used by last AOF rewrite run. */
    time_t aof_rewrite_time_start;  /* Current AOF rewrite start time. */
    int aof_lastbgrewrite_status;   /* C_OK or C_ERR */
    unsigned long aof_delayed_fsync;  /* delayed AOF fsync() counter */
    int aof_rewrite_incremental_fsync;/* fsync incrementally while aof rewriting? */
    int rdb_save_incremental_fsync;   /* fsync incrementally while rdb saving? */
    int aof_last_write_status;      /* C_OK or C_ERR */
    int aof_last_write_errno;       /* Valid if aof_last_write_status is ERR */
    int aof_load_truncated;         /* Don't stop on unexpected AOF EOF. */
    int aof_use_rdb_preamble;       /* Use RDB preamble on AOF rewrites. */
    /* AOF pipes used to communicate between parent and child during rewrite. */
    int aof_pipe_write_data_to_child;
    int aof_pipe_read_data_from_parent;
    int aof_pipe_write_ack_to_parent;
    int aof_pipe_read_ack_from_child;
    int aof_pipe_write_ack_to_child;
    int aof_pipe_read_ack_from_parent;
    int aof_stop_sending_diff;     /* If true stop sending accumulated diffs
                                      to child process. */
    sds aof_child_diff;             /* AOF diff accumulator child side. */
    ...
}
2.1、持久化参数解析
redis中aof刷盘有三种策略,由参数appendfsync控制:

- always: 每次写入都要同步AOF文件(每次在servercron中同步),在一般的SATA硬盘上,Redis只能支持大约几百TPS写入,显然跟Redis高性能特性背道而驰,不建议配置。
- no:由于操作系统每次同步AOF文件的周期不可控(其实差不多也就1s),而且会加大每次同步硬盘的数据量,虽然提升了性能,但数据安全性无法保证。
- everysec,是建议的同步策略,也是默认配置,做到兼顾性能和数据安全性。理论上只有在系统突然宕机的情况下丢失1秒的数据。需要注意的是使用everysec选项时,是利用异步线程来处理的(还记得我们之前讨论的三类子线程吗)。

void flushAppendOnlyFile(int force) {
    ...
        //这个是条件
    else if ((server.aof_fsync == AOF_FSYNC_EVERYSEC &&
                server.unixtime > server.aof_last_fsync)) {
        // 每秒刷新缓存到磁盘一次。
        if (!sync_in_progress) {
            // 添加任务到后台线程。
            aof_background_fsync(server.aof_fd);
            server.aof_fsync_offset = server.aof_current_size;
        }
        server.aof_last_fsync = server.unixtime;
    }
    ...
}

// 添加异步任务
void aof_background_fsync(int fd) {
    bioCreateBackgroundJob(BIO_AOF_FSYNC,(void*)(long)fd,NULL,NULL);
}
2.2 具体流程
1:feedAppendOnlyFile 开启aof持久化时,会将命令保存到aof_buf缓冲区中,然后再定时任务中定期的将数据持久化到磁盘。
2:flushAppendOnlyFile根据策略进行刷盘
void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int argc) {
    sds buf = sdsempty();
    robj *tmpargv[3];

    /* The DB this command was targeting is not the same as the last command
     * we appended. To issue a SELECT command is needed. */
    if (dictid != server.aof_selected_db) {
        char seldb[64];

        snprintf(seldb,sizeof(seldb),"%d",dictid);
        buf = sdscatprintf(buf,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n",
            (unsigned long)strlen(seldb),seldb);
        server.aof_selected_db = dictid;
    }

    if (cmd->proc == expireCommand || cmd->proc == pexpireCommand ||
        cmd->proc == expireatCommand) {
        /* Translate EXPIRE/PEXPIRE/EXPIREAT into PEXPIREAT */
        buf = catAppendOnlyExpireAtCommand(buf,cmd,argv[1],argv[2]);
    } else if (cmd->proc == setexCommand || cmd->proc == psetexCommand) {
        /* Translate SETEX/PSETEX to SET and PEXPIREAT */
        tmpargv[0] = createStringObject("SET",3);
        tmpargv[1] = argv[1];
        tmpargv[2] = argv[3];
        buf = catAppendOnlyGenericCommand(buf,3,tmpargv);
        decrRefCount(tmpargv[0]);
        buf = catAppendOnlyExpireAtCommand(buf,cmd,argv[1],argv[2]);
    } else if (cmd->proc == setCommand && argc > 3) {
        int i;
        robj *exarg = NULL, *pxarg = NULL;
        /* Translate SET [EX seconds][PX milliseconds] to SET and PEXPIREAT */
        buf = catAppendOnlyGenericCommand(buf,3,argv);
        for (i = 3; i < argc; i ++) {
            if (!strcasecmp(argv[i]->ptr, "ex")) exarg = argv[i+1];
            if (!strcasecmp(argv[i]->ptr, "px")) pxarg = argv[i+1];
        }
        serverAssert(!(exarg && pxarg));
        if (exarg)
            buf = catAppendOnlyExpireAtCommand(buf,server.expireCommand,argv[1],
                                               exarg);
        if (pxarg)
            buf = catAppendOnlyExpireAtCommand(buf,server.pexpireCommand,argv[1],
                                               pxarg);
    } else {
        /* All the other commands don't need translation or need the
         * same translation already operated in the command vector
         * for the replication itself. */
        buf = catAppendOnlyGenericCommand(buf,argc,argv);
    }

    /* 重点在这里 */
    if (server.aof_state == AOF_ON)
        server.aof_buf = sdscatlen(server.aof_buf,buf,sdslen(buf));

    /*如果正在rewrite重写,此时为了保证数据的一致性,需要将这个增量数据写入增量数据链表后期进行同步 */
    if (server.aof_child_pid != -1)
        aofRewriteBufferAppend((unsigned char*)buf,sdslen(buf));

    sdsfree(buf);
}
  • flushAppendOnlyFile(在servercron中调用)函数有两种模式,即参数force=0|1
    • force=0:表示可以启用一些优化操作
    • 在AOF_FSYNC_EVERYSEC模式下,为了在减少fsync同步操作,如果正在同步,那么将会再延时2s同步的优化。
#define AOF_WRITE_LOG_ERROR_RATE 30 /* Seconds between errors logging. */
void flushAppendOnlyFile(int force) {
    ssize_t nwritten;
    int sync_in_progress = 0;
    mstime_t latency;

    if (sdslen(server.aof_buf) == 0) return;
    //如果是AOF_FSYNC_EVERYSEC模式,因为aof写入操作是在单独线程完成的,所以要看是否有处于正在
    //同步中
    if (server.aof_fsync == AOF_FSYNC_EVERYSEC)
        sync_in_progress = bioPendingJobsOfType(BIO_AOF_FSYNC) != 0;

    if (server.aof_fsync == AOF_FSYNC_EVERYSEC && !force) {
       
        if (sync_in_progress) {
          //我们可以推迟1,2s,但是也不能一直推迟 
          // 前面没有推迟过 write 操作,这里将推迟写操作的起始时间记录下来
            // 然后就返回,不执行 write 或者 fsync
            if (server.aof_flush_postponed_start == 0) {
                /* No previous write postponing, remember that we are
                 * postponing the flush and return. */
                server.aof_flush_postponed_start = server.unixtime;
                return;
            } else if (server.unixtime - server.aof_flush_postponed_start < 2) {
                /* We were already waiting for fsync to finish, but for less
                 * than two seconds this is still ok. Postpone again. */
                return;
            }
            /* Otherwise fall trough, and go write since we can't wait
             * over two seconds.
             * 如果超过2s了,那么write将会阻塞
             * 
             *  */
            server.aof_delayed_fsync++;
            serverLog(LL_NOTICE,"Asynchronous AOF fsync is taking too long (disk is busy?). Writing the AOF buffer without waiting for fsync to complete, this may slow down Redis.");
        }
    }
  
    //计算延时,开启monitor下,才会启用
    latencyStartMonitor(latency);
    //具体刷盘操作
    nwritten = aofWrite(server.aof_fd,server.aof_buf,sdslen(server.aof_buf));
    latencyEndMonitor(latency);
     //不同情况输出不同的内容引起的超时
    if (sync_in_progress) {
        latencyAddSampleIfNeeded("aof-write-pending-fsync",latency);
    } else if (server.aof_child_pid != -1 || server.rdb_child_pid != -1) {
        latencyAddSampleIfNeeded("aof-write-active-child",latency);
    } else {
        latencyAddSampleIfNeeded("aof-write-alone",latency);
    }
    latencyAddSampleIfNeeded("aof-write",latency);
    //清零延迟 write 的时间记录
    server.aof_flush_postponed_start = 0;
    //写入的文件出错
    if (nwritten != (ssize_t)sdslen(server.aof_buf)) {
        static time_t last_write_error_log = 0;
        int can_log = 0;

        /* Limit logging rate to 1 line per AOF_WRITE_LOG_ERROR_RATE seconds. */
        if ((server.unixtime - last_write_error_log) > AOF_WRITE_LOG_ERROR_RATE) {
            can_log = 1;
            last_write_error_log = server.unixtime;
        }

        /* Log the AOF write error and record the error code. */
        if (nwritten == -1) {
            if (can_log) {
                serverLog(LL_WARNING,"Error writing to the AOF file: %s",
                    strerror(errno));
                server.aof_last_write_errno = errno;
            }
        } else {
            if (can_log) {
                serverLog(LL_WARNING,"Short write while writing to "
                                       "the AOF file: (nwritten=%lld, "
                                       "expected=%lld)",
                                       (long long)nwritten,
                                       (long long)sdslen(server.aof_buf));
            }
            //如果写入的文件问题,我们将其移除,有点类似回滚机制
            if (ftruncate(server.aof_fd, server.aof_current_size) == -1) {
                if (can_log) {
                    serverLog(LL_WARNING, "Could not remove short write "
                             "from the append-only file.  Redis may refuse "
                             "to load the AOF the next time it starts.  "
                             "ftruncate: %s", strerror(errno));
                }
            } else {
                /* If the ftruncate() succeeded we can set nwritten to
                 * -1 since there is no longer partial data into the AOF. */
                nwritten = -1;
            }
            server.aof_last_write_errno = ENOSPC;
        }
        //如果出问题了,是alway那么就直接退出,因为他无法回滚,已经写入到磁盘了
        /* Handle the AOF write error. */
        if (server.aof_fsync == AOF_FSYNC_ALWAYS) {
            /* We can't recover when the fsync policy is ALWAYS since the
             * reply for the client is already in the output buffers, and we
             * have the contract with the user that on acknowledged write data
             * is synced on disk. */
            serverLog(LL_WARNING,"Can't recover from AOF write error when the AOF fsync policy is 'always'. Exiting...");
            exit(1);
        } else {
            /* Recover from failed write leaving data into the buffer. However
             * set an error to stop accepting writes as long as the error
             * condition is not cleared. */
            server.aof_last_write_status = C_ERR;

            /* Trim the sds buffer if there was a partial write, and there
             * was no way to undo it with ftruncate(2). */
            if (nwritten > 0) {
                server.aof_current_size += nwritten;
                sdsrange(server.aof_buf,nwritten,-1);
            }
            return; /* We'll try again on the next call... */
        }
    } else {
        /* Successful write(2). If AOF was in error state, restore the
         * OK state and log the event. */
        if (server.aof_last_write_status == C_ERR) {
            serverLog(LL_WARNING,
                "AOF write error looks solved, Redis can write again.");
            server.aof_last_write_status = C_OK;
        }
    }
    server.aof_current_size += nwritten;
    //重复使用aof_buf,小于4K的话只是清空,如果大于4K直接释放再进行分配
    if ((sdslen(server.aof_buf)+sdsavail(server.aof_buf)) < 4000) {
        sdsclear(server.aof_buf);
    } else {
        sdsfree(server.aof_buf);
        server.aof_buf = sdsempty();
    }
    // no-appendfsync-on-rewrite参数设置了,表示在rewrite截断不能进行fsync
    /* Don't fsync if no-appendfsync-on-rewrite is set to yes and there are
     * children doing I/O in the background. */
    if (server.aof_no_fsync_on_rewrite &&
        (server.aof_child_pid != -1 || server.rdb_child_pid != -1))
            return;
    //如果是always,那么执行redis_fsync,linux下是fdatasync
    /* Perform the fsync if needed. */
    if (server.aof_fsync == AOF_FSYNC_ALWAYS) {
        /* redis_fsync is defined as fdatasync() for Linux in order to avoid
         * flushing metadata. */
        latencyStartMonitor(latency);
        redis_fsync(server.aof_fd); /* Let's try to get this data on the disk */
        latencyEndMonitor(latency);
        latencyAddSampleIfNeeded("aof-fsync-always",latency);
        server.aof_last_fsync = server.unixtime;
    } else if ((server.aof_fsync == AOF_FSYNC_EVERYSEC &&
                server.unixtime > server.aof_last_fsync)) {
        if (!sync_in_progress) aof_background_fsync(server.aof_fd);
        server.aof_last_fsync = server.unixtime;
    }
}
2.3 AOF读取
1:Redis启动时优先加载aof文件
2:debug loadaof加载aof
int main(int argc, char **argv) {
    ...
    loadDataFromDisk();
    ...
}

void loadDataFromDisk(void) {
    ...
   long long start = ustime();
    if (server.aof_state == AOF_ON) {
        if (loadAppendOnlyFile(server.aof_filename) == C_OK)
            serverLog(LL_NOTICE,"DB loaded from append only file: %.3f seconds",(float)(ustime()-start)/1000000);
    } 
    ...
}

int loadAppendOnlyFile(char *filename) {
    ...
    // 程序模拟一个客户端执行从 aof 文件读出的命令。
    fakeClient = createAOFClient();
    ...

    ...
}

else if (!strcasecmp(c->argv[1]->ptr,"loadaof")) {
        if (server.aof_state != AOF_OFF) flushAppendOnlyFile(1);
        emptyDb(-1,EMPTYDB_NO_FLAGS,NULL);
        protectClient(c);
        int ret = loadAppendOnlyFile(server.aof_filename);
        unprotectClient(c);
        if (ret != C_OK) {
            addReply(c,shared.err);
            return;
        }
        server.dirty = 0; /* Prevent AOF / replication */
        serverLog(LL_WARNING,"Append Only File loaded by DEBUG LOADAOF");
        addReply(c,shared.ok);
    }
2.4、AOF文件结构

在这里插入图片描述

2.5 AOF过期键的处理:
- 被惰性或者定期删除后,会追加一条del指令至aof文件
- 因为AOF重写:会去掉过期键

3.AOF重写源码分析

AOF持久化是通过保存被执行的写命令来记录数据库状态的,所以随着服务器运行时间的增长,AOF文件会越来越大,这样导致使用大文件还原所需的时间也就越多。重写并不是一条条分析aof文件中的日志,而是从数据库中读取现在的值,然后用一条命令来记录键值对,代替之前记录这个键值对的多条命令。
rewriteAppendOnlyFile—–>rewriteAppendOnlyFileBackground

3.1 、重写后的aof具备如下特点:

1. 过期的键不会写入(AOF持久化中被惰性或者定期删除后的键,会追加一条del指令至aof文件,并向客户端返回空;具体清除是在AOF重写时期。具体见函数rewriteAppendOnlyFileRio)
2. 重写使用进程内数据直接生成,这样新的AOF文件只保留最终数据的写入命令。
3. 多条写命令可以合并为一个
4. 单独开辟一个子进程执行rewrite

3.2 、触发条件

	1.手动触发:(可以被延时)
void bgrewriteaofCommand(redisClient *c) {

    // 不能重复运行 BGREWRITEAOF
    if (server.aof_child_pid != -1) {
        addReplyError(c,"Background append only file rewriting already in progress");

    // 如果正在执行 BGSAVE ,那么预定 BGREWRITEAOF
    // 等 BGSAVE 完成之后, BGREWRITEAOF 就会开始执行
    } else if (server.rdb_child_pid != -1) {
        server.aof_rewrite_scheduled = 1;//delay
        addReplyStatus(c,"Background append only file rewriting scheduled");

    // 执行 BGREWRITEAOF
    } else if (rewriteAppendOnlyFileBackground() == REDIS_OK) {
        addReplyStatus(c,"Background append only file rewriting started");

    } else {
        addReply(c,shared.err);
    }
}
2.主动触发:

根据配置文件auto-aof-rewrite-min-size和auto-aof-rewrite-percentage参数确定自动触发时机。即aof_current_size>auto-aof-rewrite-minsize和 (aof_current_size-aof_base_size)/aof_base_size>=auto-aof-rewritepercentage

检测时间是在serverCron函数中
/* Trigger an AOF rewrite if needed. */
       if (server.aof_state == AOF_ON &&
           server.rdb_child_pid == -1 &&
           server.aof_child_pid == -1 &&
           server.aof_rewrite_perc &&
           server.aof_current_size > server.aof_rewrite_min_size)
       {
           long long base = server.aof_rewrite_base_size ?
               server.aof_rewrite_base_size : 1;
           long long growth = (server.aof_current_size*100/base) - 100;
           if (growth >= server.aof_rewrite_perc) {
               serverLog(LL_NOTICE,"Starting automatic rewriting of AOF on %lld%% growth",growth);
               rewriteAppendOnlyFileBackground();
           }
       }

3.3 、执行流程

1:bgrewrite/达到主动触发条件
2:开启子进程进行重写
3:若有rdb子进程执行则延时
4:若有重写子进程返回错误
5:开启子进程
6:备份完成结束子进程,父进程wait3收到子进程结束信号,将重写缓冲区同步到重写AOF文件中
7:在父进程回收子进程中,会将剩余的差异数据全部写入到临时文件中,最后调用rename原子性的替换文件
注意:AOF重写时以及RDB,也会尽可能避免rehash操作

3.4、重写缓冲区

list *aof_rewrite_buf_blocks;   /* Hold changes during an AOF rewrite. */

#define AOF_RW_BUF_BLOCK_SIZE (1024*1024*10)    /* 10 MB per block */

typedef struct aofrwblock {
    unsigned long used, free;
    char buf[AOF_RW_BUF_BLOCK_SIZE];
} aofrwblock;

//返回当前aof重写缓冲区的大小
/* Return the current size of the AOF rewrite buffer. */
unsigned long aofRewriteBufferSize(void) {
    listNode *ln;
    listIter li;
    unsigned long size = 0;

    listRewind(server.aof_rewrite_buf_blocks,&li);
    while((ln = listNext(&li))) {
        aofrwblock *block = listNodeValue(ln);
        size += block->used;
    }
    return size;
}

总结

一些需要注意的点:RDB子进程以及AOF子进程互斥,不能同时存在,通过delay来避免。AOF可持久化,只有在每次定时函数时候根据指定的策略进行刷盘。
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值