redis服务器的实现
redis服务器负责与多个客户端建立网络连接,处理客户端发送的命令请求,在数据库中保存客户端执行的命令所产生的数据,并通过资源管理来维护服务器自身的运行。
redis服务器基于Reactor(多reactor)模式开发了自己的网络事件处理器:文件处理器file event handle
- 文件事件处理器使用IO多路复用程序来同时监听多个套接字,并根据套接字目前执行的任务来为套接字关联不同的事件处理器
- 当被监听的套接字准备好执行连接应答
accept
,读取read
、写入write
、关闭close
等操作时,与操作相对应的文件事件就会产生,这时文件处理器就会调用套接字之前关联好的事件处理器来处理这些事件。
在linux操作系统下,redis使用的是基于epoll建立的网络模型。
事件循环(事件处理器)
-
主要结构–事件循环,多路复用的事件处理器的结构
/* State of an event based program */ typedef struct aeEventLoop { // 大的文件描述符的值,这里应该是按linux中系统文件描述符的分配的说法 // 系统文件描述符的分配从3开始提供给用户态(对于每个进程来说process) // 因为012分别给了stdin, stdout, stderr,作为系统输出 int maxfd; /* highest file descriptor currently registered */ // 该事件循环能够注册的最大事件的数量,主要是events[]的大小, // 因为epoll_create的参数对于内核来说只是参照,并不能作为限制 int setsize; /* max number of file descriptors tracked */ long long timeEventNextId; // 已注册的事件 aeFileEvent *events; /* Registered events */ // 触发的事件,由epoll_wait进行设置 aeFiredEvent *fired; /* Fired events */ // 定时器,从字面意思来看,是一个时间堆;定时器的实现方式有三种:时间链表,时间轮,时间堆 aeTimeEvent *timeEventHead; // 该事件循环是否已经结束,如果是,就进行善后工作 int stop; // 底层多路复用api的信息 void *apidata; /* This is used for polling API specific data */ // 当server还处于数据恢复阶段,beforeSleep执行的是一些简单的逻辑 // 主要是处理与其他client的数据交互 aeBeforeSleepProc *beforesleep; // 当server不处于数据恢复阶段,获取moduleGIL锁 aeBeforeSleepProc *aftersleep; int flags; } aeEventLoop;
有关
EventLoop
中的beforesleep
和aftersleep
在下面这篇文章中有详细的讲解。 -
触发事件的结构
这个
FiredEvent
的结构确实有点让我吃惊,因为我之前写的网络模型都是直接在一整个循环中进行处理,执行回调函数等等的,在redis里面是将这写就绪事件集中在一个列表里,交给事件分发器EventDispatch
进行处理/* A fired event */ typedef struct aeFiredEvent { // 就绪事件的文件描述符 int fd; // 事件类型掩码 int mask; } aeFiredEvent;
-
文件事件的结构
/* File event structure */ typedef struct aeFileEvent { // 该事件的事件类型 int mask; /* one of AE_(READABLE|WRITABLE|BARRIER) */ aeFileProc *rfileProc; // 文件读事件的回调函数指针 aeFileProc *wfileProc; // 文件写事件的回调函数指针 // 拓展参数,异步操作方便数据回调,在timeProc通过参数回传 // 就是说这个属性就相当于一个缓冲区, // 函数发生回调的时候,从clientData中读取数据作为函数参数,当数据处理完之后, // 将事件处理的结果拷贝到clientData中等待系统调用(一般是write,send)将数据拷贝到内核的tcp缓冲区中 // 当事件触发的时候,将tcp缓冲区内的数据拷贝到这个clientData中,再等待逻辑线程回调读取。 // 从而实现一个异步的操作 void *clientData; } aeFileEvent;
-
时间事件(定时事件)的结构
/* Time event structure */ typedef struct aeTimeEvent { // 定时事件的id long long id; /* time event identifier. */ // 计时器, monotime when; // typedef uint64_t monotime; typedef unsigned long long uint_64; aeTimeProc *timeProc; // 定时事件的回调函数 aeEventFinalizerProc *finalizerProc; // 定时事件的回收函数,定时事件被删除时调用的函数 // 拓展参数,异步操作方便数据回调,在timeProc通过参数回传 // 就是说这个属性就相当于一个缓冲区, // 函数发生回调的时候,从clientData中读取数据作为函数参数,当数据处理完之后, // 将事件处理的结果拷贝到clientData中等待系统调用(一般是write,send)将数据拷贝到内核的tcp缓冲区中 // 当事件触发的时候,将tcp缓冲区内的数据拷贝到这个clientData中,再等待逻辑线程回调读取。 // 从而实现一个异步的操作 void *clientData; // 指向上一个定时事件 struct aeTimeEvent *prev; // 指向下一个定时事件 struct aeTimeEvent *next; // 引用计数,避免该事件在循环的时间时间调用中被释放, // 标记该事件是不是一个周期性事件 int refcount; /* refcount to prevent timer events from being freed in recursive time event calls. */ } aeTimeEvent;
-
api
/* Prototypes */ aeEventLoop *aeCreateEventLoop(int setsize); void aeDeleteEventLoop(aeEventLoop *eventLoop); void aeStop(aeEventLoop *eventLoop); int aeProcessEvents(aeEventLoop *eventLoop, int flags); int aeWait(int fd, int mask, long long milliseconds); void aeMain(aeEventLoop *eventLoop); char *aeGetApiName(void); void aeSetBeforeSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *beforesleep); void aeSetAfterSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *aftersleep); int aeGetSetSize(aeEventLoop *eventLoop); int aeResizeSetSize(aeEventLoop *eventLoop, int setsize); void aeSetDontWait(aeEventLoop *eventLoop, int noWait); // 文件事件 int aeCreateFileEvent(aeEventLoop *eventLoop, int fd, int mask, aeFileProc *proc, void *clientData); void aeDeleteFileEvent(aeEventLoop *eventLoop, int fd, int mask); int aeGetFileEvents(aeEventLoop *eventLoop, int fd); void *aeGetFileClientData(aeEventLoop *eventLoop, int fd); // 时间事件 long long aeCreateTimeEvent(aeEventLoop *eventLoop, long long milliseconds, aeTimeProc *proc, void *clientData, aeEventFinalizerProc *finalizerProc); int aeDeleteTimeEvent(aeEventLoop *eventLoop, long long id);
实现
-
aeEventLoop *aeCreateEventLoop(int setsize)
创建一个EventLoop
aeEventLoop *aeCreateEventLoop(int setsize) { aeEventLoop *eventLoop; int i; // 防止调用的应用没有被初始化 monotonicInit(); /* just in case the calling app didn't initialize */ if ((eventLoop = zmalloc(sizeof(*eventLoop))) == NULL) goto err; eventLoop->events = zmalloc(sizeof(aeFileEvent)*setsize); eventLoop->fired = zmalloc(sizeof(aeFiredEvent)*setsize); if (eventLoop->events == NULL || eventLoop->fired == NULL) goto err; eventLoop->setsize = setsize; eventLoop->timeEventHead = NULL; eventLoop->timeEventNextId = 0; eventLoop->stop = 0; eventLoop->maxfd = -1; eventLoop->beforesleep = NULL; eventLoop->aftersleep = NULL; eventLoop->flags = 0; if (aeApiCreate(eventLoop) == -1) goto err; /* Events with mask == AE_NONE are not set. So let's initialize the * vector with it. */ for (i = 0; i < setsize; i++) eventLoop->events[i].mask = AE_NONE; return eventLoop; err: if (eventLoop) { zfree(eventLoop->events); zfree(eventLoop->fired); zfree(eventLoop); } return NULL; }
-
void aeDeleteEventLoop(aeEventLoop *eventLoop)
void aeDeleteEventLoop(aeEventLoop *eventLoop) { aeApiFree(eventLoop); zfree(eventLoop->events); zfree(eventLoop->fired); /* Free the time events list. */ aeTimeEvent *next_te, *te = eventLoop->timeEventHead; while (te) { next_te = te->next; zfree(te); te = next_te; } zfree(eventLoop); }
-
void aeStop(aeEventLoop *eventLoop)
void aeStop(aeEventLoop *eventLoop) { eventLoop->stop = 1; }
-
int aeProcessEvents(aeEventLoop *eventLoop, int flags)
事件循环,
reactor
的核心flag
会有几种标记:-
AE_ALL_EVENTS
:处理就绪的socket事件(文件事件)和定时任务 -
AE_FILE_EVENTS
:处理就绪的socket事件 -
AE_TIME_EVENTS
:执行定时任务 -
AE_DONT_WAIT
:不需要阻塞调用io -
AE_CALL_BEFORE_SLEEP
:执行beforeSleep的回调函数 -
AE_CALL_AFTER_SLEEP
:执行afterSleep的回调函数
struct timeval { __time_T tv.sec; /* Seconds */ __suseconds_t tv_usec; /* Microseconds */ };
int aeProcessEvents(aeEventLoop *eventLoop, int flags) { // processed 执行时间数量 // numevent在多路复用io中获取的就绪事件的数量 int processed = 0, numevents; // 如果AE_TIME_EVENTS和AE_FILE_EVENTS都没有设置,说明没有事件需要处理 if (!(flags & AE_TIME_EVENTS) && !(flags & AE_FILE_EVENTS)) return 0; // eventLoop->maxfd != -1 // 说明需要处理文件事件 // (flags & AE_TIME_EVENTS) && !(flags & AE_DONT_WAIT) // 需要处理时间时间并且非阻塞 if (eventLoop->maxfd != -1 || ((flags & AE_TIME_EVENTS) && !(flags & AE_DONT_WAIT))) { int j; // 定时器 struct timeval tv, *tvp = NULL; /* NULL means infinite wait. */ int64_t usUntilTimer; if (eventLoop->beforesleep != NULL && (flags & AE_CALL_BEFORE_SLEEP)) eventLoop->beforesleep(eventLoop); // eventLoop->flags可能会在调用beforesleep的时候被改变,所以需要检查 // 同时,参数flags总是应该具有最高的优先级 // 这就意味着,如果flags的AE_DONT_WAIT被设置了,我们就应该忽略 // eventLoop->flags的值 if ((flags & AE_DONT_WAIT) || (eventLoop->flags & AE_DONT_WAIT)) { tv.tv_sec = tv.tv_usec = 0; tvp = &tv; } else if (flags & AE_TIME_EVENTS) { // 设置定时事件 usUntilTimer = usUntilEarliestTimer(eventLoop); if (usUntilTimer >= 0) { tv.tv_sec = usUntilTimer / 1000000; tv.tv_usec = usUntilTimer % 1000000; tvp = &tv; } } /* Call the multiplexing API, will return only on timeout or when * some event fires. */ numevents = aeApiPoll(eventLoop, tvp); /* Don't process file events if not requested. */ if (!(flags & AE_FILE_EVENTS)) { numevents = 0; } /* After sleep callback. */ if (eventLoop->aftersleep != NULL && flags & AE_CALL_AFTER_SLEEP) eventLoop->aftersleep(eventLoop); // 进行事件分发 for (j = 0; j < numevents; j++) { int fd = eventLoop->fired[j].fd; aeFileEvent *fe = &eventLoop->events[fd]; int mask = eventLoop->fired[j].mask; // 统计该套接字上的就绪事件数量 int fired = 0; /* Number of events fired for current fd. */ // 如果AE_BARRIER被设置,就优先处理写事件 int invert = fe->mask & AE_BARRIER; if (!invert && fe->mask & mask & AE_READABLE) { fe->rfileProc(eventLoop,fd,fe->clientData,mask); fired++; fe = &eventLoop->events[fd]; /* Refresh in case of resize. */ } /* Fire the writable event. */ if (fe->mask & mask & AE_WRITABLE) { if (!fired || fe->wfileProc != fe->rfileProc) { fe->wfileProc(eventLoop,fd,fe->clientData,mask); fired++; } } /* If we have to invert the call, fire the readable event now * after the writable one. */ // 优先处理完写事件后再处理读事件 if (invert) { fe = &eventLoop->events[fd]; /* Refresh in case of resize. */ if ((fe->mask & mask & AE_READABLE) && (!fired || fe->wfileProc != fe->rfileProc)) { fe->rfileProc(eventLoop,fd,fe->clientData,mask); fired++; } } processed++; } } /* Check time events */ if (flags & AE_TIME_EVENTS) // 处理定时任务 processed += processTimeEvents(eventLoop); return processed; /* return the number of processed file/time events */ }
-
-
int aeWait(int fd, int mask, long long milliseconds)
用于客户端和集群模式下,暂时不深究/* Wait for milliseconds until the given file descriptor becomes * writable/readable/exception */ int aeWait(int fd, int mask, long long milliseconds) { struct pollfd pfd; int retmask = 0, retval; memset(&pfd, 0, sizeof(pfd)); pfd.fd = fd; if (mask & AE_READABLE) pfd.events |= POLLIN; if (mask & AE_WRITABLE) pfd.events |= POLLOUT; if ((retval = poll(&pfd, 1, milliseconds))== 1) { if (pfd.revents & POLLIN) retmask |= AE_READABLE; if (pfd.revents & POLLOUT) retmask |= AE_WRITABLE; if (pfd.revents & POLLERR) retmask |= AE_WRITABLE; if (pfd.revents & POLLHUP) retmask |= AE_WRITABLE; return retmask; } else { return retval; } }
-
void aeMain(aeEventLoop *eventLoop)
真正的reator反应器void aeMain(aeEventLoop *eventLoop) { eventLoop->stop = 0; while (!eventLoop->stop) { aeProcessEvents(eventLoop, AE_ALL_EVENTS| AE_CALL_BEFORE_SLEEP| AE_CALL_AFTER_SLEEP); } }
-
char *aeGetApiName(void)
获取底层多路复用api的名字void aeSetBeforeSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *beforesleep);
void aeSetAfterSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *aftersleep);
int aeGetSetSize(aeEventLoop *eventLoop)
char *aeGetApiName(void) { return aeApiName(); } void aeSetBeforeSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *beforesleep) { eventLoop->beforesleep = beforesleep; } void aeSetAfterSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *aftersleep) { eventLoop->aftersleep = aftersleep; } /* Return the current set size. */ int aeGetSetSize(aeEventLoop *eventLoop) { return eventLoop->setsize; }
-
int aeResizeSetSize(aeEventLoop *eventLoop, int setsize)
:重新设置事件循环的大小,/* Resize the maximum set size of the event loop. * If the requested set size is smaller than the current set size, but * there is already a file descriptor in use that is >= the requested * set size minus one, AE_ERR is returned and the operation is not * performed at all. * * Otherwise AE_OK is returned and the operation is successful. */ // 设置的新的eventloop的大小必须大于eventloop中的最大描述符的值 int aeResizeSetSize(aeEventLoop *eventLoop, int setsize) { int i; if (setsize == eventLoop->setsize) return AE_OK; if (eventLoop->maxfd >= setsize) return AE_ERR; if (aeApiResize(eventLoop,setsize) == -1) return AE_ERR; eventLoop->events = zrealloc(eventLoop->events,sizeof(aeFileEvent)*setsize); eventLoop->fired = zrealloc(eventLoop->fired,sizeof(aeFiredEvent)*setsize); eventLoop->setsize = setsize; /* Make sure that if we created new slots, they are initialized with * an AE_NONE mask. */ // 必须初始化那些还未使用的空间 for (i = eventLoop->maxfd+1; i < setsize; i++) eventLoop->events[i].mask = AE_NONE; return AE_OK; }
-
void aeSetDontWait(aeEventLoop *eventLoop, int noWait)
/* * Tell the event processing to change the wait timeout as soon as possible. * * Note: it just means you turn on/off the global AE_DONT_WAIT. */ void aeSetDontWait(aeEventLoop *eventLoop, int noWait) { if (noWait) eventLoop->flags |= AE_DONT_WAIT; else eventLoop->flags &= ~AE_DONT_WAIT; }
-