main函数
int main(int argc, char **argv) {
struct timeval tv;
int j;
char config_from_stdin = 0;
/* We need to initialize our libraries, and the server configuration. */
#ifdef INIT_SETPROCTITLE_REPLACEMENT
spt_init(argc, argv);
#endif
tzset(); /* Populates 'timezone' global. */
zmalloc_set_oom_handler(redisOutOfMemoryHandler);
/* To achieve entropy, in case of containers, their time() and getpid() can
* be the same. But value of tv_usec is fast enough to make the difference */
gettimeofday(&tv,NULL);
srand(time(NULL)^getpid()^tv.tv_usec);
srandom(time(NULL)^getpid()^tv.tv_usec);
init_genrand64(((long long) tv.tv_sec * 1000000 + tv.tv_usec) ^ getpid());
crc64_init();
/* Store umask value. Because umask(2) only offers a set-and-get API we have
* to reset it and restore it back. We do this early to avoid a potential
* race condition with threads that could be creating files or directories.
*/
umask(server.umask = umask(0777));
uint8_t hashseed[16];
getRandomBytes(hashseed,sizeof(hashseed));
dictSetHashFunctionSeed(hashseed);
char *exec_name = strrchr(argv[0], '/');
if (exec_name == NULL) exec_name = argv[0];
server.sentinel_mode = checkForSentinelMode(argc,argv, exec_name);
initServerConfig();
ACLInit(); /* The ACL subsystem must be initialized ASAP because the
basic networking code and client creation depends on it. */
moduleInitModulesSystem();
connTypeInitialize();
/* Store the executable path and arguments in a safe place in order
* to be able to restart the server later. */
server.executable = getAbsolutePath(argv[0]);
server.exec_argv = zmalloc(sizeof(char*)*(argc+1));
server.exec_argv[argc] = NULL;
for (j = 0; j < argc; j++) server.exec_argv[j] = zstrdup(argv[j]);
/* We need to init sentinel right now as parsing the configuration file
* in sentinel mode will have the effect of populating the sentinel
* data structures with master nodes to monitor. */
if (server.sentinel_mode) {
initSentinelConfig();
initSentinel();
}
/* Check if we need to start in redis-check-rdb/aof mode. We just execute
* the program main. However the program is part of the Redis executable
* so that we can easily execute an RDB check on loading errors. */
if (strstr(exec_name,"redis-check-rdb") != NULL)
redis_check_rdb_main(argc,argv,NULL);
else if (strstr(exec_name,"redis-check-aof") != NULL)
redis_check_aof_main(argc,argv);
if (argc >= 2) {
j = 1; /* First option to parse in argv[] */
sds options = sdsempty();
/* Handle special options --help and --version */
if (strcmp(argv[1], "-v") == 0 ||
strcmp(argv[1], "--version") == 0) version();
if (strcmp(argv[1], "--help") == 0 ||
strcmp(argv[1], "-h") == 0) usage();
if (strcmp(argv[1], "--test-memory") == 0) {
if (argc == 3) {
memtest(atoi(argv[2]),50);
exit(0);
} else {
fprintf(stderr,"Please specify the amount of memory to test in megabytes.\n");
fprintf(stderr,"Example: ./redis-server --test-memory 4096\n\n");
exit(1);
}
} if (strcmp(argv[1], "--check-system") == 0) {
exit(syscheck() ? 0 : 1);
}
/* Parse command line options
* Precedence wise, File, stdin, explicit options -- last config is the one that matters.
*
* First argument is the config file name? */
if (argv[1][0] != '-') {
/* Replace the config file in server.exec_argv with its absolute path. */
server.configfile = getAbsolutePath(argv[1]);
zfree(server.exec_argv[1]);
server.exec_argv[1] = zstrdup(server.configfile);
j = 2; // Skip this arg when parsing options
}
sds *argv_tmp;
int argc_tmp;
int handled_last_config_arg = 1;
while(j < argc) {
/* Either first or last argument - Should we read config from stdin? */
if (argv[j][0] == '-' && argv[j][1] == '\0' && (j == 1 || j == argc-1)) {
config_from_stdin = 1;
}
/* All the other options are parsed and conceptually appended to the
* configuration file. For instance --port 6380 will generate the
* string "port 6380\n" to be parsed after the actual config file
* and stdin input are parsed (if they exist).
* Only consider that if the last config has at least one argument. */
else if (handled_last_config_arg && argv[j][0] == '-' && argv[j][1] == '-') {
/* Option name */
if (sdslen(options)) options = sdscat(options,"\n");
/* argv[j]+2 for removing the preceding `--` */
options = sdscat(options,argv[j]+2);
options = sdscat(options," ");
argv_tmp = sdssplitargs(argv[j], &argc_tmp);
if (argc_tmp == 1) {
/* Means that we only have one option name, like --port or "--port " */
handled_last_config_arg = 0;
if ((j != argc-1) && argv[j+1][0] == '-' && argv[j+1][1] == '-' &&
!strcasecmp(argv[j], "--save"))
{
/* Special case: handle some things like `--save --config value`.
* In this case, if next argument starts with `--`, we will reset
* handled_last_config_arg flag and append an empty "" config value
* to the options, so it will become `--save "" --config value`.
* We are doing it to be compatible with pre 7.0 behavior (which we
* break it in #10660, 7.0.1), since there might be users who generate
* a command line from an array and when it's empty that's what they produce. */
options = sdscat(options, "\"\"");
handled_last_config_arg = 1;
}
else if ((j == argc-1) && !strcasecmp(argv[j], "--save")) {
/* Special case: when empty save is the last argument.
* In this case, we append an empty "" config value to the options,
* so it will become `--save ""` and will follow the same reset thing. */
options = sdscat(options, "\"\"");
}
else if ((j != argc-1) && argv[j+1][0] == '-' && argv[j+1][1] == '-' &&
!strcasecmp(argv[j], "--sentinel"))
{
/* Special case: handle some things like `--sentinel --config value`.
* It is a pseudo config option with no value. In this case, if next
* argument starts with `--`, we will reset handled_last_config_arg flag.
* We are doing it to be compatible with pre 7.0 behavior (which we
* break it in #10660, 7.0.1). */
options = sdscat(options, "");
handled_last_config_arg = 1;
}
else if ((j == argc-1) && !strcasecmp(argv[j], "--sentinel")) {
/* Special case: when --sentinel is the last argument.
* It is a pseudo config option with no value. In this case, do nothing.
* We are doing it to be compatible with pre 7.0 behavior (which we
* break it in #10660, 7.0.1). */
options = sdscat(options, "");
}
} else {
/* Means that we are passing both config name and it's value in the same arg,
* like "--port 6380", so we need to reset handled_last_config_arg flag. */
handled_last_config_arg = 1;
}
sdsfreesplitres(argv_tmp, argc_tmp);
} else {
/* Option argument */
options = sdscatrepr(options,argv[j],strlen(argv[j]));
options = sdscat(options," ");
handled_last_config_arg = 1;
}
j++;
}
loadServerConfig(server.configfile, config_from_stdin, options);
if (server.sentinel_mode) loadSentinelConfigFromQueue();
sdsfree(options);
}
if (server.sentinel_mode) sentinelCheckConfigFile();
server.supervised = redisIsSupervised(server.supervised_mode);
int background = server.daemonize && !server.supervised;
if (background) daemonize();
serverLog(LL_WARNING, "oO0OoO0OoO0Oo Redis is starting oO0OoO0OoO0Oo");
serverLog(LL_WARNING,
"Redis version=%s, bits=%d, commit=%s, modified=%d, pid=%d, just started",
REDIS_VERSION,
(sizeof(long) == 8) ? 64 : 32,
redisGitSHA1(),
strtol(redisGitDirty(),NULL,10) > 0,
(int)getpid());
if (argc == 1) {
serverLog(LL_WARNING, "Warning: no config file specified, using the default config. In order to specify a config file use %s /path/to/redis.conf", argv[0]);
} else {
serverLog(LL_WARNING, "Configuration loaded");
}
initServer();
if (background || server.pidfile) createPidFile();
if (server.set_proc_title) redisSetProcTitle(NULL);
redisAsciiArt();
checkTcpBacklogSettings();
if (server.cluster_enabled) {
clusterInit();
}
if (!server.sentinel_mode) {
moduleInitModulesSystemLast();
moduleLoadFromQueue();
}
ACLLoadUsersAtStartup();
initListeners();
if (server.cluster_enabled) {
clusterInitListeners();
}
InitServerLast();
if (!server.sentinel_mode) {
/* Things not needed when running in Sentinel mode. */
serverLog(LL_WARNING,"Server initialized");
#ifdef __linux__
linuxMemoryWarnings();
sds err_msg = NULL;
if (checkXenClocksource(&err_msg) < 0) {
serverLog(LL_WARNING, "WARNING %s", err_msg);
sdsfree(err_msg);
}
#if defined (__arm64__)
int ret;
if ((ret = checkLinuxMadvFreeForkBug(&err_msg)) <= 0) {
if (ret < 0) {
serverLog(LL_WARNING, "WARNING %s", err_msg);
sdsfree(err_msg);
} else
serverLog(LL_WARNING, "Failed to test the kernel for a bug that could lead to data corruption during background save. "
"Your system could be affected, please report this error.");
if (!checkIgnoreWarning("ARM64-COW-BUG")) {
serverLog(LL_WARNING,"Redis will now exit to prevent data corruption. "
"Note that it is possible to suppress this warning by setting the following config: ignore-warnings ARM64-COW-BUG");
exit(1);
}
}
#endif /* __arm64__ */
#endif /* __linux__ */
aofLoadManifestFromDisk();
loadDataFromDisk();
aofOpenIfNeededOnServerStart();
aofDelHistoryFiles();
if (server.cluster_enabled) {
serverAssert(verifyClusterConfigWithData() == C_OK);
}
for (j = 0; j < CONN_TYPE_MAX; j++) {
connListener *listener = &server.listeners[j];
if (listener->ct == NULL)
continue;
serverLog(LL_NOTICE,"Ready to accept connections %s", listener->ct->get_type(NULL));
}
if (server.supervised_mode == SUPERVISED_SYSTEMD) {
if (!server.masterhost) {
redisCommunicateSystemd("STATUS=Ready to accept connections\n");
} else {
redisCommunicateSystemd("STATUS=Ready to accept connections in read-only mode. Waiting for MASTER <-> REPLICA sync\n");
}
redisCommunicateSystemd("READY=1\n");
}
} else {
sentinelIsRunning();
if (server.supervised_mode == SUPERVISED_SYSTEMD) {
redisCommunicateSystemd("STATUS=Ready to accept connections\n");
redisCommunicateSystemd("READY=1\n");
}
}
/* Warning the user about suspicious maxmemory setting. */
if (server.maxmemory > 0 && server.maxmemory < 1024*1024) {
serverLog(LL_WARNING,"WARNING: You specified a maxmemory value that is less than 1MB (current value is %llu bytes). Are you sure this is what you really want?", server.maxmemory);
}
redisSetCpuAffinity(server.server_cpulist);
setOOMScoreAdj(-1);
aeMain(server.el);
aeDeleteEventLoop(server.el);
return 0;
}
逻辑分析如下:
1.spt_init接收系统环境变量(extern char **environ)、重新修改argv的指向包括将argv[0]长度扩容(argv[0]影响进程名,这样后续fork流程可以给子进程命名,参考《redis里的小秘密:设置进程名》)
2.tzset获取当前系统时区等变量
3.zmalloc_set_oom_handler设置程序oom时的回调函数(malloc分配内存为空时打印些oom日志)
4.gettimeofday获取当前系统事件
5.初始化随机种子
6.crc64_init初始化crc校验模块
7.umask获取系统权限掩码
8.设置hash模块随机种子
9.校验进程启动参数是否为哨兵模式
10.initServerConfig:
- initConfigValues初始化启动参数(各种string、enum、int的启动参数)对应的字段注册表,例如loglevel参数对应修改的值就是server.verbosity
- updateCachedTime缓存当前系统时间给对精度要求不高的场景用,比time(NULL)访问快
- getRandomHexChars随机一个redis启动的id,用来追查log,比如redis前后id不一致表示有过重启
- 初始化实例同步id
- 初始化一堆全局变量默认值
- 生成redis指令(例如zset、lpush等)的字典,并populateCommandTable存储每个指令到字典里
11.ACLInit初始化acl权限控制模块
12.moduleInitModulesSystem模块系统初始化(模块系统可以外部提供符合声明接口的动态库,redis加载,用户就能追加自己的功能)
13.初始化客户端连接套接字的处理回调函数(例如socket读、写、读包处理、发包处理等)
14.判断是否哨兵模式,执行哨兵模式的初始化
15.判断是否是"redis-check-rdb"或者"redis-check-aof"模式,执行对应逻辑并exit退出进程
16.解析argv参数,提取出config文件路径并加载config
17.判断是否哨兵模式,再执行一些配置文件加载后的初始化逻辑
18.判断redis是否是upstart或者systemd启动管理,如果不是且设置了daemonize启动,就变为守护进程(fork之后杀掉父进程)
19.initServer:
- 忽略信号SIGHUP、SIGHUP
- 设置其它信号的回调
- 设置线程可以随时被杀死(pthread_setcancelstate+pthread_setcanceltype)
- 初始化一堆服务器全局变量
- createSharedObjects初始化一堆静态字符串,例如固定的报错信息等
- adjustOpenFilesLimit调整最大文件描述符数量(redis默认最佳10032个)
- monotonicInit初始化获取当前系统时间回调(x86-linux通过rdtsc轻量化获取时间、arm通过汇编获取mrs寄存器、其它都调用clock_gettime获取时间)
- aeCreateEventLoop创建事件处理器(就是多个链表组成,可以链定时器、描述符事件)
- 创建设置的数量个db结构
- 事件处理器挂一个每毫秒执行serverCron回调的定时器(非常重要!各种需要定时的模块都会在这里执行)
- 事件处理器挂一个管道读端描述符server.module_pipe[0],处理主线程接收子线程事件
- 事件处理器挂一个beforeSleep回调,在执行epoll陷入前执行,beforeSleep逻辑复杂:
- handleClientsWithPendingReadsUsingThreads多线程处理客户端的收包
- handleBlockedClientsTimeout处理等待操作超时的客户端
- 如果开起集群模式,看是否要处理一些集群恢复的事情等
- activeExpireCycle执行快速扫描过期key(通过config_cycle_slow_time_perc得到扫描的最大时间)
- 尝试解除因为WAIT命令挂起的客户端
- moduleHandleBlockedClients处理模块(module)实现的阻塞客户端功能(类似blpop实现了阻塞,使用方自己用模块也实现类似功能)
- processUnblockedClients处理解除阻塞的客户端(比如blpop)
- sendGetackToReplicas广播从节点收到ack
- updateFailoverStatus更新复制集群内的主节点信息(主节点挂了,备节点顶上继续提供服务)
- trackingBroadcastInvalidationMessages处理BCAST模式中,发送客户端关心的key失效消息
- handleClientsBlockedOnKeys处理阻塞的客户端(比如blpop)
- flushAppendOnlyFile处理aof文件同步到磁盘
- handleClientsWithPendingWritesUsingThreads多线程处理客户端消息发送
- freeClientsInAsyncFreeQueue清理关闭的客户端资源
- incrementalTrimReplicationBacklog处理积压的复制集日志
- evictClients内存占用大的时候驱逐一些客户端(客户端通过一些策略存放起来,可以快速索引到占用内存较大的那些客户端)
- moduleReleaseGIL释放moduleGIL锁,让别的线程可以开始访问数据了
- 事件处理器挂一个afterSleep回调,在执行完epoll后执行,主要挂起其它线程不访问数据
- scriptingInit初始化lua执行环境
- functionsInit初始化redis函数功能
- slowlogInit初始化慢日志统计功能
- latencyMonitorInit初始化延迟监控(例如io指标、命令执行速度等)功能
- ACLUpdateDefaultUserPassword更新acl默认用户名密码
- applyWatchdogPeriod开个看门狗,定期setitimer产生SIGALRM信号喂狗,定期没喂说明程序跑飞了打印执行栈帧
- initServerClientMemUsageBuckets初始化服务器统计客户端内存用量的数据结构
20.createPidFile创建一个文件写入进程pid号
21.redisSetProcTitle重新设置进程名字(进程名根据模板渲染,默认"{title} {listen-addr} {server-mode}")
22.redisAsciiArt展示启动输出控制台的运行logo这些
23.checkTcpBacklogSettings校验listen的baklog参数设置是否合理(要比内核设置的最大连接数小才行)
24.clusterInit如果是集群模式做一些初始化
25.如果不是哨兵模式,moduleLoadFromQueue加载所有注册的动态库模块
26.ACLLoadUsersAtStartup加载所有acl的用户信息
27.initListeners初始化普通tcp、tls、unix的网络监听器(例如tcp是在socket.c里注册CT_Socket结构体,结构体里都是各种处理事件的回调)
28.如果是集群模式,clusterInitListeners初始化集群模式的tcp监听器
29.InitServerLast:
- bioInit,创建后台线程(默认3个),竞争bio_jobs队列的任务执行,用于一些不想阻塞主逻辑的异步任务(例如异步关闭使用完的文件、free变量)
- initThreadedIO,创建io线程(数量由server.io_threads_num配置控制),用来处理连接套接字的收发包
- set_jemalloc_bg_thread,创建jemalloc线程
- server.initial_memory_usage记录初始启动后的内存占用
30.linuxMemoryWarnings校验/proc/sys/vm/overcommit_memory
以及/proc/sys/vm/overcommit_memory
是否设置为redis的推荐值
31.checkXenClocksource校验当前系统时钟源,如果是xen就报错(时钟源影响获取系统时间接口的性能)
33.aofLoadManifestFromDisk从磁盘加载aof配置清单的,默认位于appendonlydir/appendonly.aof
34.loadDataFromDisk从磁盘读取数据库数据(来源于aof或者rdb文件)
35.aofOpenIfNeededOnServerStart尝试打开aof功能
36.aofDelHistoryFiles删除aof的历史文件
37.如果是集群模式,verifyClusterConfigWithData校验集群模式配置
38.遍历监听套接字,输出准备监听的日志
39.redisSetCpuAffinity设置主线程cpu亲和性
40.setOOMScoreAdj设置/proc/self/oom_score_adj
初始的oom分数值
41.aeMain主线程死循环轮询逻辑,每次循环调用aeProcessEvents:
- 执行beforesleep逻辑
- 执行epoll_wait逻辑
- 执行aftersleep逻辑
- 遍历处理所有epoll_wait返回的有事件的描述符:
- 读事件调用
readQueryFromClient
,此回调内部会判断如果开启多个io线程,就将客户端置为数据可读(并不实际处理计算),交给下一次beforesleep逻辑去多线程读取数据,否则就主线程直接读取数据并处理、响应;- 写事件调用
sendReplyToClient
,此回调会发送数据给客户端,但如果开启多个io线程,此回调只有在beforeSleep:handleClientsWithPendingWritesUsingThreads逻辑发送客户端数据未发送完才重新发送;
- processTimeEvents处理定时任务serverCron,所有需要定时处理的逻辑都在里面执行
beforeSleep函数
通过上面的分析,beforeSleep
函数是个特别重的逻辑,在主线程陷入epoll_wait前执行。里面比较重要的要数handleClientsWithPendingReadsUsingThreads
和handleClientsWithPendingWritesUsingThreads
。这两个函数就是redis6.0增加的io线程,可以配置多个io线程处理套接字的收发(默认1个也就是主线程处理所有读写,作者觉得单线程足够许多业务了)。
handleClientsWithPendingReadsUsingThreads
int handleClientsWithPendingReadsUsingThreads(void) {
if (!server.io_threads_active || !server.io_threads_do_reads) return 0;
int processed = listLength(server.clients_pending_read);
if (processed == 0) return 0;
/* Distribute the clients across N different lists. */
listIter li;
listNode *ln;
listRewind(server.clients_pending_read,&li);
int item_id = 0;
while((ln = listNext(&li))) {
client *c = listNodeValue(ln);
int target_id = item_id % server.io_threads_num;
listAddNodeTail(io_threads_list[target_id],c);
item_id++;
}
/* Give the start condition to the waiting threads, by setting the
* start condition atomic var. */
io_threads_op = IO_THREADS_OP_READ;
for (int j = 1; j < server.io_threads_num; j++) {
int count = listLength(io_threads_list[j]);
setIOPendingCount(j, count);
}
/* Also use the main thread to process a slice of clients. */
listRewind(io_threads_list[0],&li);
while((ln = listNext(&li))) {
client *c = listNodeValue(ln);
readQueryFromClient(c->conn);
}
listEmpty(io_threads_list[0]);
/* Wait for all the other threads to end their work. */
while(1) {
unsigned long pending = 0;
for (int j = 1; j < server.io_threads_num; j++)
pending += getIOPendingCount(j);
if (pending == 0) break;
}
io_threads_op = IO_THREADS_OP_IDLE;
/* Run the list of clients again to process the new buffers. */
while(listLength(server.clients_pending_read)) {
ln = listFirst(server.clients_pending_read);
client *c = listNodeValue(ln);
listDelNode(server.clients_pending_read,ln);
c->pending_read_list_node = NULL;
serverAssert(!(c->flags & CLIENT_BLOCKED));
if (beforeNextClient(c) == C_ERR) {
/* If the client is no longer valid, we avoid
* processing the client later. So we just go
* to the next. */
continue;
}
/* Once io-threads are idle we can update the client in the mem usage */
updateClientMemUsageAndBucket(c);
if (processPendingCommandAndInputBuffer(c) == C_ERR) {
/* If the client is no longer valid, we avoid
* processing the client later. So we just go
* to the next. */
continue;
}
/* We may have pending replies if a thread readQueryFromClient() produced
* replies and did not put the client in pending write queue (it can't).
*/
if (!(c->flags & CLIENT_PENDING_WRITE) && clientHasPendingReplies(c))
putClientInPendingWriteQueue(c);
}
/* Update processed count on server */
server.stat_io_reads_processed += processed;
return processed;
}
1.遍历clients_pending_read队列获取所有epoll_wait标记有可读事件的客户端,通过item_id % server.io_threads_num
取模分配给多个io线程的任务队列
2.setIOPendingCount原子操作设置每个io线程自旋锁解锁(设置变量大于0,io线程会自旋检查变量值)
3.主线程也不闲着,遍历clients_pending_read[0]队列,当做io线程用
4.自旋检查所有io线程任务队列是否都空了,并发读完成
5.遍历每个客户端的接收buf,处理命令,并填充响应数据到客户端发送buf,putClientInPendingWriteQueue
将客户端加入等待发送队列由后边
handleClientsWithPendingWritesUsingThreads
int handleClientsWithPendingWritesUsingThreads(void) {
int processed = listLength(server.clients_pending_write);
if (processed == 0) return 0; /* Return ASAP if there are no clients. */
/* If I/O threads are disabled or we have few clients to serve, don't
* use I/O threads, but the boring synchronous code. */
if (server.io_threads_num == 1 || stopThreadedIOIfNeeded()) {
return handleClientsWithPendingWrites();
}
/* Start threads if needed. */
if (!server.io_threads_active) startThreadedIO();
/* Distribute the clients across N different lists. */
listIter li;
listNode *ln;
listRewind(server.clients_pending_write,&li);
int item_id = 0;
while((ln = listNext(&li))) {
client *c = listNodeValue(ln);
c->flags &= ~CLIENT_PENDING_WRITE;
/* Remove clients from the list of pending writes since
* they are going to be closed ASAP. */
if (c->flags & CLIENT_CLOSE_ASAP) {
listUnlinkNode(server.clients_pending_write, ln);
continue;
}
/* Since all replicas and replication backlog use global replication
* buffer, to guarantee data accessing thread safe, we must put all
* replicas client into io_threads_list[0] i.e. main thread handles
* sending the output buffer of all replicas. */
if (getClientType(c) == CLIENT_TYPE_SLAVE) {
listAddNodeTail(io_threads_list[0],c);
continue;
}
int target_id = item_id % server.io_threads_num;
listAddNodeTail(io_threads_list[target_id],c);
item_id++;
}
/* Give the start condition to the waiting threads, by setting the
* start condition atomic var. */
io_threads_op = IO_THREADS_OP_WRITE;
for (int j = 1; j < server.io_threads_num; j++) {
int count = listLength(io_threads_list[j]);
setIOPendingCount(j, count);
}
/* Also use the main thread to process a slice of clients. */
listRewind(io_threads_list[0],&li);
while((ln = listNext(&li))) {
client *c = listNodeValue(ln);
writeToClient(c,0);
}
listEmpty(io_threads_list[0]);
/* Wait for all the other threads to end their work. */
while(1) {
unsigned long pending = 0;
for (int j = 1; j < server.io_threads_num; j++)
pending += getIOPendingCount(j);
if (pending == 0) break;
}
io_threads_op = IO_THREADS_OP_IDLE;
/* Run the list of clients again to install the write handler where
* needed. */
listRewind(server.clients_pending_write,&li);
while((ln = listNext(&li))) {
client *c = listNodeValue(ln);
/* Update the client in the mem usage after we're done processing it in the io-threads */
updateClientMemUsageAndBucket(c);
/* Install the write handler if there are pending writes in some
* of the clients. */
if (clientHasPendingReplies(c)) {
installClientWriteHandler(c);
}
}
while(listLength(server.clients_pending_write) > 0) {
listUnlinkNode(server.clients_pending_write, server.clients_pending_write->head);
}
/* Update processed count on server */
server.stat_io_writes_processed += processed;
return processed;
}
1.如果io线程数为1或者少量客户端数据待发送,直接主线程调用handleClientsWithPendingWrites
处理客户端数据发送;
2.否则startThreadedIO
尝试唤醒io线程,其实就是pthread_mutex_unlock解除主线程获取的锁,由io线程获取锁唤醒,io线程再自旋查看任务队列是否存在任务;
3.遍历clients_pending_write
队列,依然是取模,把有发送数据的客户端打到不同io线程
4.解除io线程自旋锁,io线程开始发送工作,调用writeToClient
发送,每个客户端最大发送64k,如果没超过最大发送量,删除客户端的发送回调connSetWriteHandler(c->conn, NULL)
,如果存在这个发送回调,epoll_wait返回后会当做对应执行套接字可写的回调,如果为空,epoll_wait后会跳过这个套接字可写;
5.发送完毕由主线程遍历下所有客户端,查看如果还存在待发送数据(上一步超过64k发送量的),调用installClientWriteHandler
设置一个发送回调,就是直接往套接字发送数据,然后等待epoll_wait标记可写,主线程发送数据
总结
redis7.0依然还是单线程为主,单线程循环处理:
1、客户端数据接收、数据处理、数据响应
2、epoll_wait监听
3、定时逻辑判断
当然也存在工作线程异步处理耗时任务,例如大key的删除、大对象的free等。
而如果是用户设置有io多线程模式,则主线程处理客户端数据接收、数据响应变成主线程阻塞通知io线程完成任务,也就是用io线程加速io读写(redis速度提升1倍)。这种多线程处理io和传统的multi-reactor模型不太一样,multi-reactor是io线程处理读、逻辑处理、写,但如果io线程逻辑处理动到公共数据就要涉及多线程安全问题。redis这样做应该是出于打补丁添加io多线程的目的,毕竟前期的各种db模块均是线程不安全,要改为纯粹multi-reactor逻辑,首先就要重构所有db模块,工作量巨大。所以如果这种方式还有性能瓶颈,redis建议运行多个redis进程!