1. 入口 server.main
2. initServerConfig
不是重点,先过
3. initServer
/**
* 启动服务器
*/
void initServer(void) {
int j;
//todo 1、设置信号
signal(SIGHUP, SIG_IGN);
signal(SIGPIPE, SIG_IGN);
setupSignalHandlers();
//打开syslog
if (server.syslog_enabled) {
openlog(server.syslog_ident, LOG_PID | LOG_NDELAY | LOG_NOWAIT,
server.syslog_facility);
}
server.hz = server.config_hz;
server.pid = getpid();
server.current_client = NULL;
//todo 2、客户端链表
server.clients = listCreate();
server.clients_index = raxNew();
server.clients_to_close = listCreate();
server.slaves = listCreate();
server.monitors = listCreate();
server.clients_pending_write = listCreate();
server.slaveseldb = -1; /* Force to emit the first SELECT command. */
server.unblocked_clients = listCreate();
server.ready_keys = listCreate();
server.clients_waiting_acks = listCreate();
server.get_ack_from_slaves = 0;
server.clients_paused = 0;
server.system_memory_size = zmalloc_get_memory_size();
//创建全局共享对象
createSharedObjects();
adjustOpenFilesLimit();
//todo 3、创建事件处理
server.el = aeCreateEventLoop(server.maxclients+CONFIG_FDSET_INCR);
if (server.el == NULL) {
serverLog(LL_WARNING,
"Failed creating the event loop. Error message: '%s'",
strerror(errno));
exit(1);
}
//todo 4、创建db 16个
server.db = zmalloc(sizeof(redisDb)*server.dbnum);
/* Open the TCP listening socket for the user commands. */
//todo 5、监听普通的tcp连接
if (server.port != 0 &&
listenToPort(server.port,server.ipfd,&server.ipfd_count) == C_ERR)
exit(1);
/* Open the listening Unix domain socket. */
//unix的套接字
if (server.unixsocket != NULL) {
unlink(server.unixsocket); /* don't care if this fails */
server.sofd = anetUnixServer(server.neterr,server.unixsocket,
server.unixsocketperm, server.tcp_backlog);
if (server.sofd == ANET_ERR) {
serverLog(LL_WARNING, "Opening Unix socket: %s", server.neterr);
exit(1);
}
anetNonBlock(NULL,server.sofd);
}
/* Abort if there are no listening sockets at all. */
if (server.ipfd_count == 0 && server.sofd < 0) {
serverLog(LL_WARNING, "Configured to not listen anywhere, exiting.");
exit(1);
}
/* Create the Redis databases, and initialize other internal state. */
// todo 依次创建redis数据库16个,并初始化
for (j = 0; j < server.dbnum; j++) {
//创建键空间
server.db[j].dict = dictCreate(&dbDictType,NULL);
server.db[j].expires = dictCreate(&keyptrDictType,NULL);
server.db[j].blocking_keys = dictCreate(&keylistDictType,NULL);
server.db[j].ready_keys = dictCreate(&objectKeyPointerValueDictType,NULL);
server.db[j].watched_keys = dictCreate(&keylistDictType,NULL);
server.db[j].id = j;
server.db[j].avg_ttl = 0;
server.db[j].defrag_later = listCreate();
}
evictionPoolAlloc(); /* Initialize the LRU keys pool. */
//pub sub
server.pubsub_channels = dictCreate(&keylistDictType,NULL);
server.pubsub_patterns = listCreate();
listSetFreeMethod(server.pubsub_patterns,freePubsubPattern);
listSetMatchMethod(server.pubsub_patterns,listMatchPubsubPattern);
server.cronloops = 0;
server.rdb_child_pid = -1;
server.aof_child_pid = -1;
server.rdb_child_type = RDB_CHILD_TYPE_NONE;
server.rdb_bgsave_scheduled = 0;
server.child_info_pipe[0] = -1;
server.child_info_pipe[1] = -1;
server.child_info_data.magic = 0;
aofRewriteBufferReset();
server.aof_buf = sdsempty();
server.lastsave = time(NULL); /* At startup we consider the DB saved. */
server.lastbgsave_try = 0; /* At startup we never tried to BGSAVE. */
server.rdb_save_time_last = -1;
server.rdb_save_time_start = -1;
server.dirty = 0;
resetServerStats();
/* A few stats we don't want to reset: server startup time, and peak mem. */
server.stat_starttime = time(NULL);
server.stat_peak_memory = 0;
server.stat_rdb_cow_bytes = 0;
server.stat_aof_cow_bytes = 0;
server.cron_malloc_stats.zmalloc_used = 0;
server.cron_malloc_stats.process_rss = 0;
server.cron_malloc_stats.allocator_allocated = 0;
server.cron_malloc_stats.allocator_active = 0;
server.cron_malloc_stats.allocator_resident = 0;
server.lastbgsave_status = C_OK;
server.aof_last_write_status = C_OK;
server.aof_last_write_errno = 0;
server.repl_good_slaves_count = 0;
/* Create the timer callback, this is our way to process many background
* operations incrementally, like clients timeout, eviction of unaccessed
* expired keys and so forth. */
//todo 6、时间事件 serverCron
if (aeCreateTimeEvent(server.el, 1, serverCron, NULL, NULL) == AE_ERR) {
serverPanic("Can't create event loop timers.");
exit(1);
}
/* Create an event handler for accepting new connections in TCP and Unix
* domain sockets. */
// todo 7、文件事件 acceptTcpHandler 注册回调函数 acceptTcpHandler和AE_READABLE
for (j = 0; j < server.ipfd_count; j++) {
if (aeCreateFileEvent(server.el, server.ipfd[j], AE_READABLE,
acceptTcpHandler,NULL) == AE_ERR)
{
serverPanic(
"Unrecoverable error creating server.ipfd file event.");
}
}
if (server.sofd > 0 && aeCreateFileEvent(server.el,server.sofd,AE_READABLE,
acceptUnixHandler,NULL) == AE_ERR) serverPanic("Unrecoverable error creating server.sofd file event.");
/* Register a readable event for the pipe used to awake the event loop
* when a blocked client in a module needs attention. */
if (aeCreateFileEvent(server.el, server.module_blocked_pipe[0], AE_READABLE,
moduleBlockedClientPipeReadable,NULL) == AE_ERR) {
serverPanic(
"Error registering the readable event for the module "
"blocked clients subsystem.");
}
/* Open the AOF file if needed. */
// todo 8、有aof则打开aof文件
if (server.aof_state == AOF_ON) {
server.aof_fd = open(server.aof_filename,
O_WRONLY|O_APPEND|O_CREAT,0644);
if (server.aof_fd == -1) {
serverLog(LL_WARNING, "Can't open the append-only file: %s",
strerror(errno));
exit(1);
}
}
/* 32 bit instances are limited to 4GB of address space, so if there is
* no explicit limit in the user provided configuration we set a limit
* at 3 GB using maxmemory with 'noeviction' policy'. This avoids
* useless crashes of the Redis instance for out of memory. */
if (server.arch_bits == 32 && server.maxmemory == 0) {
serverLog(LL_WARNING,"Warning: 32 bit instance detected but no memory limit set. Setting 3 GB maxmemory limit with 'noeviction' policy now.");
server.maxmemory = 3072LL*(1024*1024); /* 3 GB */
server.maxmemory_policy = MAXMEMORY_NO_EVICTION;
}
if (server.cluster_enabled) clusterInit();
replicationScriptCacheInit();
scriptingInit(1);
slowlogInit();
latencyMonitorInit();
bioInit();
server.initial_memory_usage = zmalloc_used_memory();
}
大部分逻辑其实不难理解,重点关注以下几个:
aeCreateTimeEvent
:aeCreateTimeEvent 函数用于在事件循环中创建一个时间事件。当 Redis 服务器启动时,会使用这个函数来注册一个周期性执行的事件(如 serverCron)serverCron有机会再看,像aof的重写、rdb的save background都是它来做的aeCreateFileEvent
: 函数用于在事件循环中创建一个文件事件。文件事件主要用于处理网络 I/O 事件,例如客户端连接的读写操作
流程图:
4. loadDataFromDisk
从rdb和aof恢复 redisDB,不是重点, 先过
5. aeMain
void aeMain(aeEventLoop *eventLoop) {
eventLoop->stop = 0;
//不是0 就一直执行
while (!eventLoop->stop) {
//beforeSleep有效
if (eventLoop->beforesleep != NULL)
//执行beforeSleep
eventLoop->beforesleep(eventLoop);
//正式事件处理(wait epoll_wait select )
aeProcessEvents(eventLoop, AE_ALL_EVENTS|AE_CALL_AFTER_SLEEP);
}
}
这里的beforeSleep
回调函数在事件循环进入休眠等待 I/O 事件之前执行,主要用于处理一些需要在阻塞等待之前完成的任务
beforeSleep
:
void beforeSleep(struct aeEventLoop *eventLoop) {
UNUSED(eventLoop); // 参数未使用,避免编译器警告
// 如果启用了集群模式,调用集群的 `beforeSleep` 函数
if (server.cluster_enabled) clusterBeforeSleep();
// 如果启用了主动过期删除功能并且当前实例是主节点,进行快速过期删除
if (server.active_expire_enabled && server.masterhost == NULL)
activeExpireCycle(ACTIVE_EXPIRE_CYCLE_FAST);
// 如果在上一个事件循环中有客户端阻塞请求主从同步 ACK,发送 ACK 请求
if (server.get_ack_from_slaves) {
robj *argv[3];
argv[0] = createStringObject("REPLCONF", 8);
argv[1] = createStringObject("GETACK", 6);
argv[2] = createStringObject("*", 1); /* Not used argument. */
replicationFeedSlaves(server.slaves, server.slaveseldb, argv, 3);
decrRefCount(argv[0]);
decrRefCount(argv[1]);
decrRefCount(argv[2]);
server.get_ack_from_slaves = 0;
}
// 处理等待从节点响应的客户端,解除阻塞
if (listLength(server.clients_waiting_acks))
processClientsWaitingReplicas();
// 处理被模块阻塞的客户端
moduleHandleBlockedClients();
// 处理解锁的客户端的待处理命令
if (listLength(server.unblocked_clients))
processUnblockedClients();
// 将 AOF 缓冲区的内容写入磁盘
flushAppendOnlyFile(0);
// 处理有待写输出缓冲区的客户端
handleClientsWithPendingWrites();
// 释放 GIL(全局解释器锁),允许其他线程访问数据集
if (moduleCount()) moduleReleaseGIL();
}
int handleClientsWithPendingWrites(void) {
listIter li;
listNode *ln;
//获取待处理列表长度
int processed = listLength(server.clients_pending_write);
listRewind(server.clients_pending_write,&li);
//循环处理
while((ln = listNext(&li))) {
client *c = listNodeValue(ln);
c->flags &= ~CLIENT_PENDING_WRITE;
listDelNode(server.clients_pending_write,ln);
/* If a client is protected, don't do anything,
* that may trigger write error or recreate handler. */
if (c->flags & CLIENT_PROTECTED) continue;
/* Try to write buffers to the client socket. */
//向客户端写入数据
if (writeToClient(c->fd,c,0) == C_ERR) continue;
/* If after the synchronous writes above we still have data to
* output to the client, we need to install the writable handler. */
//未写入的数据
if (clientHasPendingReplies(c)) {
int ae_flags = AE_WRITABLE;
/* For the fsync=always policy, we want that a given FD is never
* served for reading and writing in the same event loop iteration,
* so that in the middle of receiving the query, and serving it
* to the client, we'll call beforeSleep() that will do the
* actual fsync of AOF to disk. AE_BARRIER ensures that. */
if (server.aof_state == AOF_ON &&
server.aof_fsync == AOF_FSYNC_ALWAYS)
{
ae_flags |= AE_BARRIER;
}
//创建sendReplyToClient事件,等待执行
if (aeCreateFileEvent(server.el, c->fd, ae_flags,
sendReplyToClient, c) == AE_ERR)
{
freeClientAsync(c);
}
}
}
return processed;
}
重点关注的是handleClientsWithPendingWrites
方法,会往客户端写入数据。当 Redis 调用 writeToClient 函数将数据写入客户端的 socket 时,有可能一次写操作不能将输出缓冲区的所有数据发送出去。当输出缓冲区中仍然有未发送的数据时,Redis 需要确保这些数据能在将来继续发送,为此,Redis 在事件循环中为该客户端的 socket 创建一个 可写事件(AE_WRITABLE)。这个事件表示,当客户端的 socket 变为可写状态(即,socket 缓冲区有足够的空间来接收更多数据)时,Redis 的事件循环会触发相应的事件处理器(sendReplyToClient),继续将剩余的数据发送给客户端。