redis作为全内存的服务程序,一旦进程重启则内存数据全部丢失,因此为了能在重启后数据依然存在,则需要将数据持久化到磁盘上。
redis第一版持久化Redis DataBase(RDB)全量备份
redis的第一个版本0.091,就已经具备了持久化功能,并且支持自动化持久化,通过fork生成子进程的方式进行持久化,借助Linux的cow(copy on write)特性,生成内存数据库快照,将快照数据写入磁盘。
配置redis.conf
在特定的时间内触发了特定的数据修改次数,将触发后台持久化。
save 900 1 #900秒内触发一次数据改变
save 300 10 #300秒内触发10次改变
save 60 10000 # 60秒内触发10000次改变
加载配置
static void loadServerConfig(char *filename) {
...
else if (!strcmp(argv[0],"save") && argc == 3) {
int seconds = atoi(argv[1]);
int changes = atoi(argv[2]);
...
appendServerSaveParams(seconds,changes);
}
...
}
static void appendServerSaveParams(time_t seconds, int changes) {
server.saveparams = zrealloc(server.saveparams,sizeof(struct saveparam)*(server.saveparamslen+1));
if (server.saveparams == NULL) oom("appendServerSaveParams");
server.saveparams[server.saveparamslen].seconds = seconds;
server.saveparams[server.saveparamslen].changes = changes;
server.saveparamslen++;
}
周期性的检测是否满足条件
int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
...
//正在生成rdb过程中
if (server.bgsaveinprogress) {
int statloc;
if (wait4(-1,&statloc,WNOHANG,NULL)) { //检测是否已经结束
int exitcode = WEXITSTATUS(statloc);
...
server.bgsaveinprogress = 0; //结束
}
} else {
//现在没有后台进程在生成rdb,检测规则是否满足
time_t now = time(NULL);
for (j = 0; j < server.saveparamslen; j++) {
struct saveparam *sp = server.saveparams+j;
if (server.dirty >= sp->changes &&
now-server.lastsave > sp->seconds) { //改变次数以及时间
...
//满足条件,开始后台生成rdb
rdbSaveBackground(server.dbfilename);
break;
}
}
}
...
}
后台生成rdb文件
static int rdbSaveBackground(char *filename) {
pid_t childpid;
if (server.bgsaveinprogress) return REDIS_ERR; //判断是否以及有后台进程在生成rdb,有则退出
if ((childpid = fork()) == 0) { //创建子进程
/* Child */
//将内存数据写入文件
rdbSave(filename)
...
} else {
//父进程只需要设置状态标志,不需要等待,直接返回
server.bgsaveinprogress = 1;
return REDIS_OK;
}
return REDIS_OK; /* unreached */
}
static int rdbSave(char *filename) {
dictIterator *di = NULL;
dictEntry *de;
FILE *fp;
char tmpfile[256];
int j;
time_t now = time(NULL);
//将内存数据写入临时文件
snprintf(tmpfile,256,"temp-%d.%ld.rdb",(int)time(NULL),(long int)random());
fp = fopen(tmpfile,"w");
...
//写入头
fwrite("REDIS0001",9,1,fp)
//遍历数据库,将数据序列号到文件中
for (j = 0; j < server.dbnum; j++) {
redisDb *db = server.db+j;
dict *d = db->dict;
...
}
...
//写入结束符
rdbSaveType(fp,REDIS_EOF)
//刷新磁盘
fflush(fp);
fsync(fileno(fp));
fclose(fp);
//将临时文件保存为rdb文件
rename(tmpfile,filename)
}
进程重启时,加载rdb文件,恢复数据
int main(int argc, char **argv) {
...
if (rdbLoad(server.dbfilename) == REDIS_OK)
redisLog(REDIS_NOTICE,"DB loaded from disk");
...
}
//加载rdb主函数
static int rdbLoad(char *filename) {
FILE *fp;
robj *keyobj = NULL;
uint32_t dbid;
int type, retval, rdbver;
dict *d = server.db[0].dict;
redisDb *db = server.db+0;
char buf[1024];
time_t expiretime = -1, now = time(NULL);
fp = fopen(filename,"r");
...
//读取头
fread(buf,9,1,fp) == 0)
buf[9] = '\0';
//校验合法
if (memcmp(buf,"REDIS",5) != 0) {
fclose(fp);
redisLog(REDIS_WARNING,"Wrong signature trying to load DB from file");
return REDIS_ERR;
}
rdbver = atoi(buf+5);
if (rdbver > 1) {
fclose(fp);
redisLog(REDIS_WARNING,"Can't handle RDB format version %d",rdbver);
return REDIS_ERR;
}
//反序列化
while(1) {
robj *o;
/* Read type. */
if ((type = rdbLoadType(fp)) == -1) goto eoferr;
...
if (type == REDIS_EOF) break; //处理完成
...
}
fclose(fp);
return REDIS_OK;
...
}
- fork也是一个开销很大的操作,在业务繁忙的时候不适合频繁的进行rdb操作
- 当系统很空闲的时候,就一俩个修改没必要进行rdb操作
- 当数据量很大时,持久化过程将很长,在此过程中有大量数据的更新,现在异常重启,将导致大量的数据丢失
redis第二版持久化Append Only File (AOF) 增量备份
redis 1.1.90版本中引入了AOF,即将每天修改数据的命令都追加到文件中,重启时加载文件一条一条的执行以恢复数据(命令重放)
配置redis.conf
#开关 no/yes
appendonly no
#刷新磁盘的策略
appendfsync always
# appendfsync everysec
# appendfsync no
写aof文件(默认appendonly.log)
static int processCommand(redisClient *c) {
...
/* Exec the command */
dirty = server.dirty;
cmd->proc(c);
//开启aof,并且此命令有修改数据, 则将此命令追加到文件
if (server.appendonly && server.dirty-dirty)
feedAppendOnlyFile(cmd,c->db->id,c->argv,c->argc);
...
}
static void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int argc) {
sds buf = sdsempty();
int j;
ssize_t nwritten;
time_t now;
robj *tmpargv[3];
...
//构建命令
/* Append the actual command */
buf = sdscatprintf(buf,"*%d\r\n",argc);
for (j = 0; j < argc; j++) {
robj *o = argv[j];
o = getDecodedObject(o);
buf = sdscatprintf(buf,"$%lu\r\n",sdslen(o->ptr));
buf = sdscatlen(buf,o->ptr,sdslen(o->ptr));
buf = sdscatlen(buf,"\r\n",2);
decrRefCount(o);
}
//简单的写文件
nwritten = write(server.appendfd,buf,sdslen(buf));
...
//正在aof重写时,同时缓存此命令
if (server.bgrewritechildpid != -1)
server.bgrewritebuf = sdscatlen(server.bgrewritebuf,buf,sdslen(buf));
...
//刷磁盘策略, 每次或则每秒
now = time(NULL);
if (server.appendfsync == APPENDFSYNC_ALWAYS ||
(server.appendfsync == APPENDFSYNC_EVERYSEC &&
now-server.lastfsync > 1))
{
fsync(server.appendfd); /* Let's try to get this data on the disk */
server.lastfsync = now;
}
}
重启时加载aof文件
int main(int argc, char **argv) {
...
if (server.appendonly) { //如果开启了aof,则加载aof
loadAppendOnlyFile(server.appendfilename)
...
}
...
}
int loadAppendOnlyFile(char *filename) {
struct redisClient *fakeClient;
FILE *fp = fopen(filename,"r");
struct redis_stat sb;
if (redis_fstat(fileno(fp),&sb) != -1 && sb.st_size == 0)
return REDIS_ERR;
...
//模拟了一个虚拟的客户端,读取命令,一行一行的执行
fakeClient = createFakeClient();
while(1) {
...
//读取一行
if (fgets(buf,sizeof(buf),fp) == NULL) {
if (feof(fp))
break;
else
goto readerr;
}
//解析参数
if (buf[0] != '*') goto fmterr;
argc = atoi(buf+1);
argv = zmalloc(sizeof(robj*)*argc);
for (j = 0; j < argc; j++) {
if (fgets(buf,sizeof(buf),fp) == NULL) goto readerr;
if (buf[0] != '$') goto fmterr;
len = strtol(buf+1,NULL,10);
argsds = sdsnewlen(NULL,len);
if (len && fread(argsds,len,1,fp) == 0) goto fmterr;
argv[j] = createObject(REDIS_STRING,argsds);
if (fread(buf,2,1,fp) == 0) goto fmterr; /* discard CRLF */
}
//查找命令处理函数
/* Command lookup */
cmd = lookupCommand(argv[0]->ptr);
...
//执行命令
/* Run the command in the context of a fake client */
fakeClient->argc = argc;
fakeClient->argv = argv;
cmd->proc(fakeClient);
/* Discard the reply objects list from the fake client */
//不需要响应,所以直接丢弃
while(listLength(fakeClient->reply))
listDelNode(fakeClient->reply,listFirst(fakeClient->reply));
//清理参数
/* Clean up, ready for the next command */
for (j = 0; j < argc; j++) decrRefCount(argv[j]);
zfree(argv);
}
fclose(fp);
freeFakeClient(fakeClient);
return REDIS_OK;
...
}
aof重写
随着时间的推移,aof文件将越来越大
- 重启加载aof文件时,耗时越来越长
- 很多key可以以及删除了
- 很多命令可以合并为一条命令
所以需要给aof文件来个瘦身手术。
当前版本只能通过命令(bgrewriteaof)来进行aof文件瘦身
static struct redisCommand cmdTable[] = {
...
{"bgrewriteaof",bgrewriteaofCommand,1,REDIS_CMD_INLINE},
...
}
//主进程执行命令
static void bgrewriteaofCommand(redisClient *c) {
...
rewriteAppendOnlyFileBackground()
...
}
static int rewriteAppendOnlyFileBackground(void) {
pid_t childpid;
...
if ((childpid = fork()) == 0) {
...
//子进程进行aof重写,类似rdb
snprintf(tmpfile,256,"temp-rewriteaof-bg-%d.aof", (int) getpid());
if (rewriteAppendOnlyFile(tmpfile) == REDIS_OK) {
exit(0);
} else {
exit(1);
}
} else {
//主进程返回,继续处理其他请求
...
server.bgrewritechildpid = childpid;
...
return REDIS_OK;
}
return REDIS_OK; /* unreached */
}
//子进程//
static int rewriteAppendOnlyFile(char *filename) {
dictIterator *di = NULL;
dictEntry *de;
FILE *fp;
char tmpfile[256];
int j;
time_t now = time(NULL);
/* Note that we have to use a different temp name here compared to the
* one used by rewriteAppendOnlyFileBackground() function. */
snprintf(tmpfile,256,"temp-rewriteaof-%d.aof", (int) getpid());
fp = fopen(tmpfile,"w");
...
//遍历所有数据库,将所有数据转换成命令
for (j = 0; j < server.dbnum; j++) {
char selectcmd[] = "*2\r\n$6\r\nSELECT\r\n";
redisDb *db = server.db+j;
...
}
/* Make sure data will not remain on the OS's output buffers */
fflush(fp);
fsync(fileno(fp));
fclose(fp);
/* Use RENAME to make sure the DB file is changed atomically only
* if the generate DB file is ok. */
if (rename(tmpfile,filename) == -1) {
...
}
redisLog(REDIS_NOTICE,"SYNC append only file rewrite performed");
return REDIS_OK;
...
}
子进程将内存数据写入临时的aof文件后,主进程将检测子进程的结束,如果重写结束则将重写期间的新命令追加到新的文件中,然后切换新的文件为aof文件。
static int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
...
if (server.bgsavechildpid != -1 || server.bgrewritechildpid != -1) {
int statloc;
pid_t pid;
if ((pid = wait3(&statloc,WNOHANG,NULL)) != 0) {
if (pid == server.bgsavechildpid) {
backgroundSaveDoneHandler(statloc);
} else {
backgroundRewriteDoneHandler(statloc); //重写进程结束处理
}
}
}
...
}
void backgroundRewriteDoneHandler(int statloc) {
...
//成功重写处理
snprintf(tmpfile,256,"temp-rewriteaof-bg-%d.aof", (int) server.bgrewritechildpid);
fd = open(tmpfile,O_WRONLY|O_APPEND);
//将在重写期间新的命令追加到新的临时aof文件中
/* Flush our data... */
write(fd,server.bgrewritebuf,sdslen(server.bgrewritebuf))
...
/* Now our work is to rename the temp file into the stable file. And
* switch the file descriptor used by the server for append only. */
//切换
rename(tmpfile,server.appendfilename)
/* Mission completed... almost */
redisLog(REDIS_NOTICE,"Append only file successfully rewritten.");
if (server.appendfd != -1) { //关闭原来的aof文件句柄
/* If append only is actually enabled... */
close(server.appendfd);
server.appendfd = fd;
fsync(fd);
server.appendseldb = -1; /* Make sure it will issue SELECT */
redisLog(REDIS_NOTICE,"The new append only file was selected for future appends.");
} else {
/* If append only is disabled we just generate a dump in this
* format. Why not? */
close(fd);
}
...
cleanup:
sdsfree(server.bgrewritebuf);
server.bgrewritebuf = sdsempty(); // 用于在重写期间
aofRemoveTempFile(server.bgrewritechildpid);
server.bgrewritechildpid = -1;
}
- 最后将重写期间的新的命令追加到新的aof文件中,如果此期间有大量的修改数据的命令产生,将阻塞主进程,不能响应其他请求
- 在重启时,加载aof一行一行的执行,相比于rdb加载,耗时更长
redis第三版持久化 RDB + AOF (混合备份)
结合rdb重启加载快以及aof的数据丢失少,在redis4.0.0引入了rdb+aof结合的备份方式
配置开关
# yes/no
aof-use-rdb-preamble no
启动加载aof文件
void loadDataFromDisk(void) {
long long start = ustime();
if (server.aof_state == AOF_ON) { //开启了aof,则加载aof文件
loadAppendOnlyFile(server.aof_filename)
...
} else {
//否则加载rdb
...
rdbLoad(server.rdb_filename,&rsi)
}
int loadAppendOnlyFile(char *filename) {
...
FILE *fp = fopen(filename,"r");
...
char sig[5]; /* "REDIS" */
//检测是否使用了rdb+aof, 如果开启,则文件都是REDIS
if (fread(sig,1,5,fp) != 5 || memcmp(sig,"REDIS",5) != 0) {
/* No RDB preamble, seek back at 0 offset. */
if (fseek(fp,0,SEEK_SET) == -1) goto readerr;
} else {
...
fseek(fp,0,SEEK_SET)
...
rdbLoadRio(&rdb,NULL) //加载rdb
...
}
/* Read the actual AOF file, in REPL format, command by command. */
while(1) {
...
if (fgets(buf,sizeof(buf),fp) == NULL) {
if (feof(fp))
break;
else
goto readerr;
}
...
/* Command lookup */
cmd = lookupCommand(argv[0]->ptr);
...
/* Run the command in the context of a fake client */
fakeClient->cmd = cmd;
cmd->proc(fakeClient);
...
/* Clean up. Command code may have changed argv/argc so we use the
* argv/argc of the client instead of the local variables. */
freeFakeClientArgv(fakeClient);
...
}
....
}
记录aof
正常aof记录
aof重写
重写策略
auto-aof-rewrite-percentage 100
auto-aof-rewrite-min-size 64mb
定时任务检测是否满足重写策略
serverCron()
{
...
/* Trigger an AOF rewrite if needed */
if (server.rdb_child_pid == -1 &&
server.aof_child_pid == -1 &&
server.aof_rewrite_perc &&
server.aof_current_size > server.aof_rewrite_min_size) //满足文件大小条件
{
//计算增长率是否满足
long long base = server.aof_rewrite_base_size ?
server.aof_rewrite_base_size : 1;
long long growth = (server.aof_current_size*100/base) - 100;
//增长率满足,则进行aof重写
if (growth >= server.aof_rewrite_perc) {
serverLog(LL_NOTICE,"Starting automatic rewriting of AOF on %lld%% growth",growth);
rewriteAppendOnlyFileBackground();
}
}
...
}
满足大小限制以及增长率,进行aof重写
int rewriteAppendOnlyFileBackground(void) {
...
//创建管道,用于父子进程通信
if (aofCreatePipes() != C_OK) return C_ERR;
openChildInfoPipe();
...
//创建子进程
if ((childpid = fork()) == 0) {
...
//重写aof
snprintf(tmpfile,256,"temp-rewriteaof-bg-%d.aof", (int) getpid());
if (rewriteAppendOnlyFile(tmpfile) == C_OK) {
...
exitFromChild(0);
} else {
exitFromChild(1);
}
} else {
....
//父进程退出
server.aof_child_pid = childpid;
...
return C_OK;
}
return C_OK; /* unreached */
}
int rewriteAppendOnlyFile(char *filename) {
...
//写入临时文件
snprintf(tmpfile,256,"temp-rewriteaof-%d.aof", (int) getpid());
fp = fopen(tmpfile,"w");
//用于接收缓存在重写过程中新产生的命令
server.aof_child_diff = sdsempty();
//关键配置,配置此项,则使用rdb进行重写
if (server.aof_use_rdb_preamble) {
int error;
if (rdbSaveRio(&aof,&error,RDB_SAVE_AOF_PREAMBLE,NULL) == C_ERR) {
errno = error;
goto werr;
}
} else {
//否则按照原有aof重写规则,进行重写
if (rewriteAppendOnlyFileRio(&aof) == C_ERR) goto werr;
}
/* Do an initial slow fsync here while the parent is still sending
* data, in order to make the next final fsync faster. */
if (fflush(fp) == EOF) goto werr;
if (fsync(fileno(fp)) == -1) goto werr;
//通过管道,从父进程读取数据
mstime_t start = mstime();
while(mstime()-start < 1000 && nodata < 20) {
if (aeWait(server.aof_pipe_read_data_from_parent, AE_READABLE, 1) <= 0)
{
nodata++;
continue;
}
nodata = 0; /* Start counting from zero, we stop on N *contiguous*
timeouts. */
aofReadDiffFromParent(); //读取,写入缓存中
}
//通知父进程,不要发送了
/* Ask the master to stop sending diffs. */
if (write(server.aof_pipe_write_ack_to_parent,"!",1) != 1) goto werr;
//读取父进程的响应
if (anetNonBlock(NULL,server.aof_pipe_read_ack_from_parent) != ANET_OK)
goto werr;
/* We read the ACK from the server using a 10 seconds timeout. Normally
* it should reply ASAP, but just in case we lose its reply, we are sure
* the child will eventually get terminated. */
if (syncRead(server.aof_pipe_read_ack_from_parent,&byte,1,5000) != 1 ||
byte != '!') goto werr;
serverLog(LL_NOTICE,"Parent agreed to stop sending diffs. Finalizing AOF...");
//读取管道中剩余数据
/* Read the final diff if any. */
aofReadDiffFromParent();
//追加到aof中
if (rioWrite(&aof,server.aof_child_diff,sdslen(server.aof_child_diff)) == 0)
goto werr;
//刷新磁盘
/* Make sure data will not remain on the OS's output buffers */
if (fflush(fp) == EOF) goto werr;
if (fsync(fileno(fp)) == -1) goto werr;
if (fclose(fp) == EOF) goto werr;
//新旧文件切换
/* Use RENAME to make sure the DB file is changed atomically only
* if the generate DB file is ok. */
if (rename(tmpfile,filename) == -1) {
serverLog(LL_WARNING,"Error moving temp append only file on the final destination: %s", strerror(errno));
unlink(tmpfile);
return C_ERR;
}
serverLog(LL_NOTICE,"SYNC append only file rewrite performed");
return C_OK;
...
}
父进程处理
processCommand()
{
...
call()
...
}
void call(client *c, int flags) {
...
propagate()
...
}
void propagate(struct redisCommand *cmd, int dbid, robj **argv, int argc,
int flags)
{
...
feedAppendOnlyFile(cmd,dbid,argv,argc);
...
}
void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int argc) {
...
//构建命令
...
buf = catAppendOnlyGenericCommand(buf,argc,argv);
...
//追加到aof文件,如果开启了aof(这写的是正常的aof文件,在aof重写过程中,相当于双写,防止异常)
if (server.aof_state == AOF_ON)
server.aof_buf = sdscatlen(server.aof_buf,buf,sdslen(buf));
//如果在aof重写,则将命令通过管道发送给子进程
if (server.aof_child_pid != -1)
aofRewriteBufferAppend((unsigned char*)buf,sdslen(buf));
...
}
void aofRewriteBufferAppend(unsigned char *s, unsigned long len) {
listNode *ln = listLast(server.aof_rewrite_buf_blocks);
aofrwblock *block = ln ? ln->value : NULL;
//将数据写入到链表中
while(len) {
/* If we already got at least an allocated block, try appending
* at least some piece into it. */
if (block) {
unsigned long thislen = (block->free < len) ? block->free : len;
if (thislen) { /* The current block is not already full. */
memcpy(block->buf+block->used, s, thislen);
block->used += thislen;
block->free -= thislen;
s += thislen;
len -= thislen;
}
}
if (len) { /* First block to allocate, or need another block. */
int numblocks;
block = zmalloc(sizeof(*block));
block->free = AOF_RW_BUF_BLOCK_SIZE;
block->used = 0;
listAddNodeTail(server.aof_rewrite_buf_blocks,block);
/* Log every time we cross more 10 or 100 blocks, respectively
* as a notice or warning. */
numblocks = listLength(server.aof_rewrite_buf_blocks);
if (((numblocks+1) % 10) == 0) {
int level = ((numblocks+1) % 100) == 0 ? LL_WARNING :
LL_NOTICE;
serverLog(level,"Background AOF buffer size: %lu MB",
aofRewriteBufferSize()/(1024*1024));
}
}
}
/* Install a file event to send data to the rewrite child if there is
* not one already. */
//注册回调,将数据发送给子进程
if (aeGetFileEvents(server.el,server.aof_pipe_write_data_to_child) == 0) {
aeCreateFileEvent(server.el, server.aof_pipe_write_data_to_child,
AE_WRITABLE, aofChildWriteDiffData, NULL);
}
}
void aofChildWriteDiffData(aeEventLoop *el, int fd, void *privdata, int mask) {
...
//将链表中的数据发送给子进程
while(1) {
ln = listFirst(server.aof_rewrite_buf_blocks);
block = ln ? ln->value : NULL;
//子进程通知父进程停止发送, 或则链表已经发送完毕
if (server.aof_stop_sending_diff || !block) {
aeDeleteFileEvent(server.el,server.aof_pipe_write_data_to_child,
AE_WRITABLE);
return;
}
if (block->used > 0) {
//通过管道发送给子进程
nwritten = write(server.aof_pipe_write_data_to_child,
block->buf,block->used);
if (nwritten <= 0) return;
memmove(block->buf,block->buf+nwritten,block->used-nwritten);
block->used -= nwritten;
block->free += nwritten;
}
if (block->used == 0) listDelNode(server.aof_rewrite_buf_blocks,ln);
}
}
//父进程接收到子进程的停止发送数据请求,然后回复一个确认
void aofChildPipeReadable(aeEventLoop *el, int fd, void *privdata, int mask) {
char byte;
UNUSED(el);
UNUSED(privdata);
UNUSED(mask);
if (read(fd,&byte,1) == 1 && byte == '!') {
serverLog(LL_NOTICE,"AOF rewrite child asks to stop sending diffs.");
server.aof_stop_sending_diff = 1;
if (write(server.aof_pipe_write_ack_to_child,"!",1) != 1) {
/* If we can't send the ack, inform the user, but don't try again
* since in the other side the children will use a timeout if the
* kernel can't buffer our write, or, the children was
* terminated. */
serverLog(LL_WARNING,"Can't send ACK to AOF child: %s",
strerror(errno));
}
}
/* Remove the handler since this can be called only one time during a
* rewrite. */
aeDeleteFileEvent(server.el,server.aof_pipe_read_ack_from_child,AE_READABLE);
}