redis7.0入口函数和事件循环简单分析

main函数


int main(int argc, char **argv) {
    struct timeval tv;
    int j;
    char config_from_stdin = 0;
    
    /* We need to initialize our libraries, and the server configuration. */
#ifdef INIT_SETPROCTITLE_REPLACEMENT
    spt_init(argc, argv);
#endif
    tzset(); /* Populates 'timezone' global. */
    zmalloc_set_oom_handler(redisOutOfMemoryHandler);

    /* To achieve entropy, in case of containers, their time() and getpid() can
     * be the same. But value of tv_usec is fast enough to make the difference */
    gettimeofday(&tv,NULL);
    srand(time(NULL)^getpid()^tv.tv_usec);
    srandom(time(NULL)^getpid()^tv.tv_usec);
    init_genrand64(((long long) tv.tv_sec * 1000000 + tv.tv_usec) ^ getpid());
    crc64_init();

    /* Store umask value. Because umask(2) only offers a set-and-get API we have
     * to reset it and restore it back. We do this early to avoid a potential
     * race condition with threads that could be creating files or directories.
     */
    umask(server.umask = umask(0777));

    uint8_t hashseed[16];
    getRandomBytes(hashseed,sizeof(hashseed));
    dictSetHashFunctionSeed(hashseed);

    char *exec_name = strrchr(argv[0], '/');
    if (exec_name == NULL) exec_name = argv[0];
    server.sentinel_mode = checkForSentinelMode(argc,argv, exec_name);
    initServerConfig();
    ACLInit(); /* The ACL subsystem must be initialized ASAP because the
                  basic networking code and client creation depends on it. */
    moduleInitModulesSystem();
    connTypeInitialize();

    /* Store the executable path and arguments in a safe place in order
     * to be able to restart the server later. */
    server.executable = getAbsolutePath(argv[0]);
    server.exec_argv = zmalloc(sizeof(char*)*(argc+1));
    server.exec_argv[argc] = NULL;
    for (j = 0; j < argc; j++) server.exec_argv[j] = zstrdup(argv[j]);

    /* We need to init sentinel right now as parsing the configuration file
     * in sentinel mode will have the effect of populating the sentinel
     * data structures with master nodes to monitor. */
    if (server.sentinel_mode) {
        initSentinelConfig();
        initSentinel();
    }

    /* Check if we need to start in redis-check-rdb/aof mode. We just execute
     * the program main. However the program is part of the Redis executable
     * so that we can easily execute an RDB check on loading errors. */
    if (strstr(exec_name,"redis-check-rdb") != NULL)
        redis_check_rdb_main(argc,argv,NULL);
    else if (strstr(exec_name,"redis-check-aof") != NULL)
        redis_check_aof_main(argc,argv);

    if (argc >= 2) {
        j = 1; /* First option to parse in argv[] */
        sds options = sdsempty();

        /* Handle special options --help and --version */
        if (strcmp(argv[1], "-v") == 0 ||
            strcmp(argv[1], "--version") == 0) version();
        if (strcmp(argv[1], "--help") == 0 ||
            strcmp(argv[1], "-h") == 0) usage();
        if (strcmp(argv[1], "--test-memory") == 0) {
            if (argc == 3) {
                memtest(atoi(argv[2]),50);
                exit(0);
            } else {
                fprintf(stderr,"Please specify the amount of memory to test in megabytes.\n");
                fprintf(stderr,"Example: ./redis-server --test-memory 4096\n\n");
                exit(1);
            }
        } if (strcmp(argv[1], "--check-system") == 0) {
            exit(syscheck() ? 0 : 1);
        }
        /* Parse command line options
         * Precedence wise, File, stdin, explicit options -- last config is the one that matters.
         *
         * First argument is the config file name? */
        if (argv[1][0] != '-') {
            /* Replace the config file in server.exec_argv with its absolute path. */
            server.configfile = getAbsolutePath(argv[1]);
            zfree(server.exec_argv[1]);
            server.exec_argv[1] = zstrdup(server.configfile);
            j = 2; // Skip this arg when parsing options
        }
        sds *argv_tmp;
        int argc_tmp;
        int handled_last_config_arg = 1;
        while(j < argc) {
            /* Either first or last argument - Should we read config from stdin? */
            if (argv[j][0] == '-' && argv[j][1] == '\0' && (j == 1 || j == argc-1)) {
                config_from_stdin = 1;
            }
            /* All the other options are parsed and conceptually appended to the
             * configuration file. For instance --port 6380 will generate the
             * string "port 6380\n" to be parsed after the actual config file
             * and stdin input are parsed (if they exist).
             * Only consider that if the last config has at least one argument. */
            else if (handled_last_config_arg && argv[j][0] == '-' && argv[j][1] == '-') {
                /* Option name */
                if (sdslen(options)) options = sdscat(options,"\n");
                /* argv[j]+2 for removing the preceding `--` */
                options = sdscat(options,argv[j]+2);
                options = sdscat(options," ");

                argv_tmp = sdssplitargs(argv[j], &argc_tmp);
                if (argc_tmp == 1) {
                    /* Means that we only have one option name, like --port or "--port " */
                    handled_last_config_arg = 0;

                    if ((j != argc-1) && argv[j+1][0] == '-' && argv[j+1][1] == '-' &&
                        !strcasecmp(argv[j], "--save"))
                    {
                        /* Special case: handle some things like `--save --config value`.
                         * In this case, if next argument starts with `--`, we will reset
                         * handled_last_config_arg flag and append an empty "" config value
                         * to the options, so it will become `--save "" --config value`.
                         * We are doing it to be compatible with pre 7.0 behavior (which we
                         * break it in #10660, 7.0.1), since there might be users who generate
                         * a command line from an array and when it's empty that's what they produce. */
                        options = sdscat(options, "\"\"");
                        handled_last_config_arg = 1;
                    }
                    else if ((j == argc-1) && !strcasecmp(argv[j], "--save")) {
                        /* Special case: when empty save is the last argument.
                         * In this case, we append an empty "" config value to the options,
                         * so it will become `--save ""` and will follow the same reset thing. */
                        options = sdscat(options, "\"\"");
                    }
                    else if ((j != argc-1) && argv[j+1][0] == '-' && argv[j+1][1] == '-' &&
                        !strcasecmp(argv[j], "--sentinel"))
                    {
                        /* Special case: handle some things like `--sentinel --config value`.
                         * It is a pseudo config option with no value. In this case, if next
                         * argument starts with `--`, we will reset handled_last_config_arg flag.
                         * We are doing it to be compatible with pre 7.0 behavior (which we
                         * break it in #10660, 7.0.1). */
                        options = sdscat(options, "");
                        handled_last_config_arg = 1;
                    }
                    else if ((j == argc-1) && !strcasecmp(argv[j], "--sentinel")) {
                        /* Special case: when --sentinel is the last argument.
                         * It is a pseudo config option with no value. In this case, do nothing.
                         * We are doing it to be compatible with pre 7.0 behavior (which we
                         * break it in #10660, 7.0.1). */
                        options = sdscat(options, "");
                    }
                } else {
                    /* Means that we are passing both config name and it's value in the same arg,
                     * like "--port 6380", so we need to reset handled_last_config_arg flag. */
                    handled_last_config_arg = 1;
                }
                sdsfreesplitres(argv_tmp, argc_tmp);
            } else {
                /* Option argument */
                options = sdscatrepr(options,argv[j],strlen(argv[j]));
                options = sdscat(options," ");
                handled_last_config_arg = 1;
            }
            j++;
        }

        loadServerConfig(server.configfile, config_from_stdin, options);
        if (server.sentinel_mode) loadSentinelConfigFromQueue();
        sdsfree(options);
    }
    if (server.sentinel_mode) sentinelCheckConfigFile();
    server.supervised = redisIsSupervised(server.supervised_mode);
    int background = server.daemonize && !server.supervised;
    if (background) daemonize();

    serverLog(LL_WARNING, "oO0OoO0OoO0Oo Redis is starting oO0OoO0OoO0Oo");
    serverLog(LL_WARNING,
        "Redis version=%s, bits=%d, commit=%s, modified=%d, pid=%d, just started",
            REDIS_VERSION,
            (sizeof(long) == 8) ? 64 : 32,
            redisGitSHA1(),
            strtol(redisGitDirty(),NULL,10) > 0,
            (int)getpid());

    if (argc == 1) {
        serverLog(LL_WARNING, "Warning: no config file specified, using the default config. In order to specify a config file use %s /path/to/redis.conf", argv[0]);
    } else {
        serverLog(LL_WARNING, "Configuration loaded");
    }

    initServer();
    if (background || server.pidfile) createPidFile();
    if (server.set_proc_title) redisSetProcTitle(NULL);
    redisAsciiArt();
    checkTcpBacklogSettings();
    if (server.cluster_enabled) {
        clusterInit();
    }
    if (!server.sentinel_mode) {
        moduleInitModulesSystemLast();
        moduleLoadFromQueue();
    }
    ACLLoadUsersAtStartup();
    initListeners();
    if (server.cluster_enabled) {
        clusterInitListeners();
    }
    InitServerLast();

    if (!server.sentinel_mode) {
        /* Things not needed when running in Sentinel mode. */
        serverLog(LL_WARNING,"Server initialized");
    #ifdef __linux__
        linuxMemoryWarnings();
        sds err_msg = NULL;
        if (checkXenClocksource(&err_msg) < 0) {
            serverLog(LL_WARNING, "WARNING %s", err_msg);
            sdsfree(err_msg);
        }
    #if defined (__arm64__)
        int ret;
        if ((ret = checkLinuxMadvFreeForkBug(&err_msg)) <= 0) {
            if (ret < 0) {
                serverLog(LL_WARNING, "WARNING %s", err_msg);
                sdsfree(err_msg);
            } else
                serverLog(LL_WARNING, "Failed to test the kernel for a bug that could lead to data corruption during background save. "
                                      "Your system could be affected, please report this error.");
            if (!checkIgnoreWarning("ARM64-COW-BUG")) {
                serverLog(LL_WARNING,"Redis will now exit to prevent data corruption. "
                                     "Note that it is possible to suppress this warning by setting the following config: ignore-warnings ARM64-COW-BUG");
                exit(1);
            }
        }
    #endif /* __arm64__ */
    #endif /* __linux__ */
        aofLoadManifestFromDisk();
        loadDataFromDisk();
        aofOpenIfNeededOnServerStart();
        aofDelHistoryFiles();
        if (server.cluster_enabled) {
            serverAssert(verifyClusterConfigWithData() == C_OK);
        }

        for (j = 0; j < CONN_TYPE_MAX; j++) {
            connListener *listener = &server.listeners[j];
            if (listener->ct == NULL)
                continue;

            serverLog(LL_NOTICE,"Ready to accept connections %s", listener->ct->get_type(NULL));
        }

        if (server.supervised_mode == SUPERVISED_SYSTEMD) {
            if (!server.masterhost) {
                redisCommunicateSystemd("STATUS=Ready to accept connections\n");
            } else {
                redisCommunicateSystemd("STATUS=Ready to accept connections in read-only mode. Waiting for MASTER <-> REPLICA sync\n");
            }
            redisCommunicateSystemd("READY=1\n");
        }
    } else {
        sentinelIsRunning();
        if (server.supervised_mode == SUPERVISED_SYSTEMD) {
            redisCommunicateSystemd("STATUS=Ready to accept connections\n");
            redisCommunicateSystemd("READY=1\n");
        }
    }

    /* Warning the user about suspicious maxmemory setting. */
    if (server.maxmemory > 0 && server.maxmemory < 1024*1024) {
        serverLog(LL_WARNING,"WARNING: You specified a maxmemory value that is less than 1MB (current value is %llu bytes). Are you sure this is what you really want?", server.maxmemory);
    }

    redisSetCpuAffinity(server.server_cpulist);
    setOOMScoreAdj(-1);

    aeMain(server.el);
    aeDeleteEventLoop(server.el);
    return 0;
}

逻辑分析如下:
1.spt_init接收系统环境变量(extern char **environ)、重新修改argv的指向包括将argv[0]长度扩容(argv[0]影响进程名,这样后续fork流程可以给子进程命名,参考《redis里的小秘密:设置进程名》
2.tzset获取当前系统时区等变量
3.zmalloc_set_oom_handler设置程序oom时的回调函数(malloc分配内存为空时打印些oom日志)
4.gettimeofday获取当前系统事件
5.初始化随机种子
6.crc64_init初始化crc校验模块
7.umask获取系统权限掩码
8.设置hash模块随机种子
9.校验进程启动参数是否为哨兵模式
10.initServerConfig:

  • initConfigValues初始化启动参数(各种string、enum、int的启动参数)对应的字段注册表,例如loglevel参数对应修改的值就是server.verbosity
  • updateCachedTime缓存当前系统时间给对精度要求不高的场景用,比time(NULL)访问快
  • getRandomHexChars随机一个redis启动的id,用来追查log,比如redis前后id不一致表示有过重启
  • 初始化实例同步id
  • 初始化一堆全局变量默认值
  • 生成redis指令(例如zset、lpush等)的字典,并populateCommandTable存储每个指令到字典里

11.ACLInit初始化acl权限控制模块
12.moduleInitModulesSystem模块系统初始化(模块系统可以外部提供符合声明接口的动态库,redis加载,用户就能追加自己的功能)
13.初始化客户端连接套接字的处理回调函数(例如socket读、写、读包处理、发包处理等)
14.判断是否哨兵模式,执行哨兵模式的初始化
15.判断是否是"redis-check-rdb"或者"redis-check-aof"模式,执行对应逻辑并exit退出进程
16.解析argv参数,提取出config文件路径并加载config
17.判断是否哨兵模式,再执行一些配置文件加载后的初始化逻辑
18.判断redis是否是upstart或者systemd启动管理,如果不是且设置了daemonize启动,就变为守护进程(fork之后杀掉父进程)
19.initServer:

  • 忽略信号SIGHUP、SIGHUP
  • 设置其它信号的回调
  • 设置线程可以随时被杀死(pthread_setcancelstate+pthread_setcanceltype)
  • 初始化一堆服务器全局变量
  • createSharedObjects初始化一堆静态字符串,例如固定的报错信息等
  • adjustOpenFilesLimit调整最大文件描述符数量(redis默认最佳10032个)
  • monotonicInit初始化获取当前系统时间回调(x86-linux通过rdtsc轻量化获取时间、arm通过汇编获取mrs寄存器、其它都调用clock_gettime获取时间)
  • aeCreateEventLoop创建事件处理器(就是多个链表组成,可以链定时器、描述符事件)
  • 创建设置的数量个db结构
  • 事件处理器挂一个每毫秒执行serverCron回调的定时器(非常重要!各种需要定时的模块都会在这里执行)
  • 事件处理器挂一个管道读端描述符server.module_pipe[0],处理主线程接收子线程事件
  • 事件处理器挂一个beforeSleep回调,在执行epoll陷入前执行,beforeSleep逻辑复杂:
  • handleClientsWithPendingReadsUsingThreads多线程处理客户端的收包
  • handleBlockedClientsTimeout处理等待操作超时的客户端
  • 如果开起集群模式,看是否要处理一些集群恢复的事情等
  • activeExpireCycle执行快速扫描过期key(通过config_cycle_slow_time_perc得到扫描的最大时间)
  • 尝试解除因为WAIT命令挂起的客户端
  • moduleHandleBlockedClients处理模块(module)实现的阻塞客户端功能(类似blpop实现了阻塞,使用方自己用模块也实现类似功能)
  • processUnblockedClients处理解除阻塞的客户端(比如blpop)
  • sendGetackToReplicas广播从节点收到ack
  • updateFailoverStatus更新复制集群内的主节点信息(主节点挂了,备节点顶上继续提供服务)
  • trackingBroadcastInvalidationMessages处理BCAST模式中,发送客户端关心的key失效消息
  • handleClientsBlockedOnKeys处理阻塞的客户端(比如blpop)
  • flushAppendOnlyFile处理aof文件同步到磁盘
  • handleClientsWithPendingWritesUsingThreads多线程处理客户端消息发送
  • freeClientsInAsyncFreeQueue清理关闭的客户端资源
  • incrementalTrimReplicationBacklog处理积压的复制集日志
  • evictClients内存占用大的时候驱逐一些客户端(客户端通过一些策略存放起来,可以快速索引到占用内存较大的那些客户端)
  • moduleReleaseGIL释放moduleGIL锁,让别的线程可以开始访问数据了
  • 事件处理器挂一个afterSleep回调,在执行完epoll后执行,主要挂起其它线程不访问数据
  • scriptingInit初始化lua执行环境
  • functionsInit初始化redis函数功能
  • slowlogInit初始化慢日志统计功能
  • latencyMonitorInit初始化延迟监控(例如io指标、命令执行速度等)功能
  • ACLUpdateDefaultUserPassword更新acl默认用户名密码
  • applyWatchdogPeriod开个看门狗,定期setitimer产生SIGALRM信号喂狗,定期没喂说明程序跑飞了打印执行栈帧
  • initServerClientMemUsageBuckets初始化服务器统计客户端内存用量的数据结构

20.createPidFile创建一个文件写入进程pid号
21.redisSetProcTitle重新设置进程名字(进程名根据模板渲染,默认"{title} {listen-addr} {server-mode}")
22.redisAsciiArt展示启动输出控制台的运行logo这些
23.checkTcpBacklogSettings校验listen的baklog参数设置是否合理(要比内核设置的最大连接数小才行)
24.clusterInit如果是集群模式做一些初始化
25.如果不是哨兵模式,moduleLoadFromQueue加载所有注册的动态库模块
26.ACLLoadUsersAtStartup加载所有acl的用户信息
27.initListeners初始化普通tcp、tls、unix的网络监听器(例如tcp是在socket.c里注册CT_Socket结构体,结构体里都是各种处理事件的回调)
28.如果是集群模式,clusterInitListeners初始化集群模式的tcp监听器
29.InitServerLast:

  • bioInit,创建后台线程(默认3个),竞争bio_jobs队列的任务执行,用于一些不想阻塞主逻辑的异步任务(例如异步关闭使用完的文件、free变量)
  • initThreadedIO,创建io线程(数量由server.io_threads_num配置控制),用来处理连接套接字的收发包
  • set_jemalloc_bg_thread,创建jemalloc线程
  • server.initial_memory_usage记录初始启动后的内存占用

30.linuxMemoryWarnings校验/proc/sys/vm/overcommit_memory以及/proc/sys/vm/overcommit_memory是否设置为redis的推荐值
31.checkXenClocksource校验当前系统时钟源,如果是xen就报错(时钟源影响获取系统时间接口的性能)
33.aofLoadManifestFromDisk从磁盘加载aof配置清单的,默认位于appendonlydir/appendonly.aof
34.loadDataFromDisk从磁盘读取数据库数据(来源于aof或者rdb文件)
35.aofOpenIfNeededOnServerStart尝试打开aof功能
36.aofDelHistoryFiles删除aof的历史文件
37.如果是集群模式,verifyClusterConfigWithData校验集群模式配置
38.遍历监听套接字,输出准备监听的日志
39.redisSetCpuAffinity设置主线程cpu亲和性
40.setOOMScoreAdj设置/proc/self/oom_score_adj初始的oom分数值
41.aeMain主线程死循环轮询逻辑,每次循环调用aeProcessEvents:

  • 执行beforesleep逻辑
  • 执行epoll_wait逻辑
  • 执行aftersleep逻辑
  • 遍历处理所有epoll_wait返回的有事件的描述符:
  • 读事件调用readQueryFromClient,此回调内部会判断如果开启多个io线程,就将客户端置为数据可读(并不实际处理计算),交给下一次beforesleep逻辑去多线程读取数据,否则就主线程直接读取数据并处理、响应;
  • 写事件调用sendReplyToClient,此回调会发送数据给客户端,但如果开启多个io线程,此回调只有在beforeSleep:handleClientsWithPendingWritesUsingThreads逻辑发送客户端数据未发送完才重新发送;
  • processTimeEvents处理定时任务serverCron,所有需要定时处理的逻辑都在里面执行

beforeSleep函数

通过上面的分析,beforeSleep函数是个特别重的逻辑,在主线程陷入epoll_wait前执行。里面比较重要的要数handleClientsWithPendingReadsUsingThreadshandleClientsWithPendingWritesUsingThreads。这两个函数就是redis6.0增加的io线程,可以配置多个io线程处理套接字的收发(默认1个也就是主线程处理所有读写,作者觉得单线程足够许多业务了)。

handleClientsWithPendingReadsUsingThreads

int handleClientsWithPendingReadsUsingThreads(void) {
    if (!server.io_threads_active || !server.io_threads_do_reads) return 0;
    int processed = listLength(server.clients_pending_read);
    if (processed == 0) return 0;

    /* Distribute the clients across N different lists. */
    listIter li;
    listNode *ln;
    listRewind(server.clients_pending_read,&li);
    int item_id = 0;
    while((ln = listNext(&li))) {
        client *c = listNodeValue(ln);
        int target_id = item_id % server.io_threads_num;
        listAddNodeTail(io_threads_list[target_id],c);
        item_id++;
    }

    /* Give the start condition to the waiting threads, by setting the
     * start condition atomic var. */
    io_threads_op = IO_THREADS_OP_READ;
    for (int j = 1; j < server.io_threads_num; j++) {
        int count = listLength(io_threads_list[j]);
        setIOPendingCount(j, count);
    }

    /* Also use the main thread to process a slice of clients. */
    listRewind(io_threads_list[0],&li);
    while((ln = listNext(&li))) {
        client *c = listNodeValue(ln);
        readQueryFromClient(c->conn);
    }
    listEmpty(io_threads_list[0]);

    /* Wait for all the other threads to end their work. */
    while(1) {
        unsigned long pending = 0;
        for (int j = 1; j < server.io_threads_num; j++)
            pending += getIOPendingCount(j);
        if (pending == 0) break;
    }

    io_threads_op = IO_THREADS_OP_IDLE;

    /* Run the list of clients again to process the new buffers. */
    while(listLength(server.clients_pending_read)) {
        ln = listFirst(server.clients_pending_read);
        client *c = listNodeValue(ln);
        listDelNode(server.clients_pending_read,ln);
        c->pending_read_list_node = NULL;

        serverAssert(!(c->flags & CLIENT_BLOCKED));

        if (beforeNextClient(c) == C_ERR) {
            /* If the client is no longer valid, we avoid
             * processing the client later. So we just go
             * to the next. */
            continue;
        }

        /* Once io-threads are idle we can update the client in the mem usage */
        updateClientMemUsageAndBucket(c);

        if (processPendingCommandAndInputBuffer(c) == C_ERR) {
            /* If the client is no longer valid, we avoid
             * processing the client later. So we just go
             * to the next. */
            continue;
        }

        /* We may have pending replies if a thread readQueryFromClient() produced
         * replies and did not put the client in pending write queue (it can't).
         */
        if (!(c->flags & CLIENT_PENDING_WRITE) && clientHasPendingReplies(c))
            putClientInPendingWriteQueue(c);
    }

    /* Update processed count on server */
    server.stat_io_reads_processed += processed;

    return processed;
}

1.遍历clients_pending_read队列获取所有epoll_wait标记有可读事件的客户端,通过item_id % server.io_threads_num取模分配给多个io线程的任务队列
2.setIOPendingCount原子操作设置每个io线程自旋锁解锁(设置变量大于0,io线程会自旋检查变量值)
3.主线程也不闲着,遍历clients_pending_read[0]队列,当做io线程用
4.自旋检查所有io线程任务队列是否都空了,并发读完成
5.遍历每个客户端的接收buf,处理命令,并填充响应数据到客户端发送buf,putClientInPendingWriteQueue将客户端加入等待发送队列由后边

handleClientsWithPendingWritesUsingThreads

int handleClientsWithPendingWritesUsingThreads(void) {
    int processed = listLength(server.clients_pending_write);
    if (processed == 0) return 0; /* Return ASAP if there are no clients. */

    /* If I/O threads are disabled or we have few clients to serve, don't
     * use I/O threads, but the boring synchronous code. */
    if (server.io_threads_num == 1 || stopThreadedIOIfNeeded()) {
        return handleClientsWithPendingWrites();
    }

    /* Start threads if needed. */
    if (!server.io_threads_active) startThreadedIO();

    /* Distribute the clients across N different lists. */
    listIter li;
    listNode *ln;
    listRewind(server.clients_pending_write,&li);
    int item_id = 0;
    while((ln = listNext(&li))) {
        client *c = listNodeValue(ln);
        c->flags &= ~CLIENT_PENDING_WRITE;

        /* Remove clients from the list of pending writes since
         * they are going to be closed ASAP. */
        if (c->flags & CLIENT_CLOSE_ASAP) {
            listUnlinkNode(server.clients_pending_write, ln);
            continue;
        }

        /* Since all replicas and replication backlog use global replication
         * buffer, to guarantee data accessing thread safe, we must put all
         * replicas client into io_threads_list[0] i.e. main thread handles
         * sending the output buffer of all replicas. */
        if (getClientType(c) == CLIENT_TYPE_SLAVE) {
            listAddNodeTail(io_threads_list[0],c);
            continue;
        }

        int target_id = item_id % server.io_threads_num;
        listAddNodeTail(io_threads_list[target_id],c);
        item_id++;
    }

    /* Give the start condition to the waiting threads, by setting the
     * start condition atomic var. */
    io_threads_op = IO_THREADS_OP_WRITE;
    for (int j = 1; j < server.io_threads_num; j++) {
        int count = listLength(io_threads_list[j]);
        setIOPendingCount(j, count);
    }

    /* Also use the main thread to process a slice of clients. */
    listRewind(io_threads_list[0],&li);
    while((ln = listNext(&li))) {
        client *c = listNodeValue(ln);
        writeToClient(c,0);
    }
    listEmpty(io_threads_list[0]);

    /* Wait for all the other threads to end their work. */
    while(1) {
        unsigned long pending = 0;
        for (int j = 1; j < server.io_threads_num; j++)
            pending += getIOPendingCount(j);
        if (pending == 0) break;
    }

    io_threads_op = IO_THREADS_OP_IDLE;

    /* Run the list of clients again to install the write handler where
     * needed. */
    listRewind(server.clients_pending_write,&li);
    while((ln = listNext(&li))) {
        client *c = listNodeValue(ln);

        /* Update the client in the mem usage after we're done processing it in the io-threads */
        updateClientMemUsageAndBucket(c);

        /* Install the write handler if there are pending writes in some
         * of the clients. */
        if (clientHasPendingReplies(c)) {
            installClientWriteHandler(c);
        }
    }
    while(listLength(server.clients_pending_write) > 0) {
        listUnlinkNode(server.clients_pending_write, server.clients_pending_write->head);
    }

    /* Update processed count on server */
    server.stat_io_writes_processed += processed;

    return processed;
}

1.如果io线程数为1或者少量客户端数据待发送,直接主线程调用handleClientsWithPendingWrites处理客户端数据发送;
2.否则startThreadedIO尝试唤醒io线程,其实就是pthread_mutex_unlock解除主线程获取的锁,由io线程获取锁唤醒,io线程再自旋查看任务队列是否存在任务;
3.遍历clients_pending_write队列,依然是取模,把有发送数据的客户端打到不同io线程
4.解除io线程自旋锁,io线程开始发送工作,调用writeToClient发送,每个客户端最大发送64k,如果没超过最大发送量,删除客户端的发送回调connSetWriteHandler(c->conn, NULL),如果存在这个发送回调,epoll_wait返回后会当做对应执行套接字可写的回调,如果为空,epoll_wait后会跳过这个套接字可写;
5.发送完毕由主线程遍历下所有客户端,查看如果还存在待发送数据(上一步超过64k发送量的),调用installClientWriteHandler设置一个发送回调,就是直接往套接字发送数据,然后等待epoll_wait标记可写,主线程发送数据

总结

redis7.0依然还是单线程为主,单线程循环处理:

1、客户端数据接收、数据处理、数据响应
2、epoll_wait监听
3、定时逻辑判断

当然也存在工作线程异步处理耗时任务,例如大key的删除、大对象的free等。
而如果是用户设置有io多线程模式,则主线程处理客户端数据接收、数据响应变成主线程阻塞通知io线程完成任务,也就是用io线程加速io读写(redis速度提升1倍)。这种多线程处理io和传统的multi-reactor模型不太一样,multi-reactor是io线程处理读、逻辑处理、写,但如果io线程逻辑处理动到公共数据就要涉及多线程安全问题。redis这样做应该是出于打补丁添加io多线程的目的,毕竟前期的各种db模块均是线程不安全,要改为纯粹multi-reactor逻辑,首先就要重构所有db模块,工作量巨大。所以如果这种方式还有性能瓶颈,redis建议运行多个redis进程!

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值