一、epoll模块的介绍
在nginx中,epoll模块是事件模块中的一个重要的模块,这个模块主要是对网络
io进行操作,默认边沿触发
一般来说,nginx模块会有与配置部分相关的commands的数组,来配置解析配置文件中相关的配置信息部分,在nginx中在全局部分有如下的配置:
在events模块中配置epoll时间数组的个数,也可以使用默认值
static ngx_command_t ngx_epoll_commands[] = {
{ngx_string("epoll_events"),
NGX_EVENT_CONF|NGX_CONF_TAKE1, //表示配置在events模块中且需要带一个参数
ngx_conf_set_num_slot,//解析方法是取出对应项的值
0,
offsetof(ngx_epoll_conf_t, events),
NULL },
{ngx_string("worker_aio_requests"),
NGX_EVENT_CONF|NGX_CONF_TAKE1,
ngx_conf_set_num_slot,
0,
offsetof(ngx_epoll_conf_t, aio_requests),
NULL },
ngx_null_command
};
二、时间模块的相关配置
typedef struct {
ngx_str_t *name; //事件模块名称
//在解析配置项前,此回调用于创建存储配置项参数的结构体
void *(*create_conf)(ngx_cycle_t*cycle);
//在配置项解析完成后,综合处理当前事件模块感兴趣的全部配置项
char *(*init_conf)(ngx_cycle_t*cycle, void *conf);
ngx_event_actions_t actions;//对于事件驱动机制,每个事件驱动模块都要实现10个抽象方法
} ngx_event_module_t;
而其中最为重要的是10个actions函数指针。
下面是ngx_event_actions_t的介绍
typedefstruct {
//添加事件模块
ngx_int_t (*add)(ngx_event_t *ev, ngx_int_t event, ngx_uint_t flags);
//删除事件模块
ngx_int_t (*del)(ngx_event_t *ev, ngx_int_t event, ngx_uint_t flags);
//启动一个事件
ngx_int_t (*enable)(ngx_event_t *ev, ngx_int_t event, ngx_uint_t flags);
//禁用一个事件
ngx_int_t (*disable)(ngx_event_t *ev, ngx_int_t event, ngx_uint_t flags);
ngx_int_t (*add_conn)(ngx_connection_t *c);//向事件添加一个新连接
ngx_int_t (*del_conn)(ngx_connection_t *c, ngx_uint_t flags);//向事件移除一个连接
ngx_int_t (*process_changes)(ngx_cycle_t *cycle, ngx_uint_t nowait);
//正常工作循环中调process_events处理事件
ngx_int_t (*process_events)(ngx_cycle_t *cycle, ngx_msec_t timer,
ngx_uint_t flags);
//初始化事件驱动模块
ngx_int_t (*init)(ngx_cycle_t *cycle, ngx_msec_t timer);
//退出事件驱动模块前的方法
void (*done)(ngx_cycle_t *cycle);
} ngx_event_actions_t;
下面是epoll模块上下文结构的实现
ngx_event_module_t ngx_epoll_module_ctx = {
&epoll_name,
ngx_epoll_create_conf, /* create configuration */
ngx_epoll_init_conf, /* init configuration */
{
ngx_epoll_add_event, /* add an event */
ngx_epoll_del_event, /* delete an event */
ngx_epoll_add_event, /* enable an event */
ngx_epoll_del_event, /* disable an event */
ngx_epoll_add_connection, /* add an connection */
ngx_epoll_del_connection, /* delete an connection */
NULL, /* process thechanges */
ngx_epoll_process_events, /* process the events */
ngx_epoll_init, /* init the events */
ngx_epoll_done, /* done the events */
}
};
当然,模块最重要的部分是nginx的ngx_module_t的实现,下面是其实现
ngx_module_t ngx_epoll_module = {
NGX_MODULE_V1,
&ngx_epoll_module_ctx, /* module context */
ngx_epoll_commands, /* module directives */
NGX_EVENT_MODULE, /* module type */
NULL, /* init master*/
NULL, /* init module*/
NULL, /* init process*/
NULL, /* init thread*/
NULL, /* exit thread*/
NULL, /* exit process */
NULL, /* exit master*/
NGX_MODULE_V1_PADDING
};
其实,通过ngx_epoll_module可以统领nginx模块的
三、函数描述
ngx_epoll_init比较简单,逻辑如下
static ngx_int_t
ngx_epoll_init(ngx_cycle_t*cycle, ngx_msec_t timer)
{
ngx_epoll_conf_t *epcf;
//获取create_conf中生成的ngx_epoll_conf_t结构体,它已经被赋予配置文件中的值
epcf =ngx_event_get_conf(cycle->conf_ctx, ngx_epoll_module);
if (ep == -1) {
//调用epoll_create在内核中创建epoll对象
ep =epoll_create(cycle->connection_n / 2);
if (ep == -1) {
ngx_log_error(NGX_LOG_EMERG,cycle->log, ngx_errno,
"epoll_create()failed");
return NGX_ERROR;
}
}
if (nevents < epcf->events) {
if (event_list) {
ngx_free(event_list);
}
//初始化event_list数组。数组的个数是配置项epoll_events参数
event_list = ngx_alloc(sizeof(structepoll_event) * epcf->events,
cycle->log);
if (event_list == NULL) {
return NGX_ERROR;
}
}
//nevents也是配置项epoll_events参数
nevents = epcf->events;
//指定读写io方法
ngx_io = ngx_os_io;
//设置ngx_event_actions接口
ngx_event_actions =ngx_epoll_module_ctx.actions;
#if(NGX_HAVE_CLEAR_EVENT)
//默认采用ET模式来使用epoll,NGX_USE_CLEAR_EVENT宏实际上是告诉nginx使用ET模式
ngx_event_flags = NGX_USE_CLEAR_EVENT
#else
ngx_event_flags = NGX_USE_LEVEL_EVENT
#endif
|NGX_USE_GREEDY_EVENT
|NGX_USE_EPOLL_EVENT;
return NGX_OK;
}
函数ngx_epoll_add_event
大体逻辑如下:
对应代码:
static ngx_int_t
ngx_epoll_add_event(ngx_event_t*ev, ngx_int_t event, ngx_uint_t flags)
{
int op;
uint32_t events, prev;
ngx_event_t *e;
ngx_connection_t *c;
struct epoll_event ee;
//每个时间的data成员都存放着对应的ngx_connection_t连接
c = ev->data;
//根据event参数确定当前事件是读事件还是写事件,这会决定events加上EPOLLIN或EPOLLOUT
events = (uint32_t) event;
if (event == NGX_READ_EVENT) {
e = c->write;
prev = EPOLLOUT;
#if(NGX_READ_EVENT != EPOLLIN)
events = EPOLLIN;
#endif
} else {
e = c->read;
prev = EPOLLIN;
#if(NGX_WRITE_EVENT != EPOLLOUT)
events = EPOLLOUT;
#endif
}
if (e->active) { //根据active标志位确定是否为活跃事件,以决定到底是修改还是添加事件
op = EPOLL_CTL_MOD;
events |= prev;
} else {
op = EPOLL_CTL_ADD;
}
//加入flags参数到events标志中
ee.events = events | (uint32_t) flags;
//ptr成员存储ngx_connection_t连接
//
ee.data.ptr = (void *) ((uintptr_t) c |ev->instance);
ngx_log_debug3(NGX_LOG_DEBUG_EVENT,ev->log, 0,
"epoll add event: fd:%dop:%d ev:%08XD",
c->fd, op, ee.events);
//调用epoll_ctl方法向epoll中添加事件或者epoll中修改事件
if (epoll_ctl(ep, op, c->fd, &ee) ==-1) {
ngx_log_error(NGX_LOG_ALERT,ev->log, ngx_errno,
"epoll_ctl(%d, %d)failed", op, c->fd);
return NGX_ERROR;
}
//将事件的active标志位置为1,表示当前事件是活跃的
ev->active = 1;
#if 0
ev->oneshot = (flags &NGX_ONESHOT_EVENT) ? 1 : 0;
#endif
return NGX_OK;
}
函数ngx_epoll_del_event
相关代码
static ngx_int_t
ngx_epoll_del_event(ngx_event_t*ev, ngx_int_t event, ngx_uint_t flags)
{
int op;
uint32_t prev;
ngx_event_t *e;
ngx_connection_t *c;
struct epoll_event ee;
/*
* when the file descriptor is closed, theepoll automatically deletes
* it from its queue, so we do not need todelete explicity the event
* before the closing the file descriptor
*/
if (flags & NGX_CLOSE_EVENT) {
ev->active = 0;
return NGX_OK;
}
c = ev->data;
if (event == NGX_READ_EVENT) {
e = c->write;
prev = EPOLLOUT;
} else {
e = c->read;
prev = EPOLLIN;
}
if (e->active) {
op = EPOLL_CTL_MOD;
ee.events = prev | (uint32_t) flags;
ee.data.ptr = (void *) ((uintptr_t) c |ev->instance);
} else {
op = EPOLL_CTL_DEL;
ee.events = 0;
ee.data.ptr = NULL;
}
ngx_log_debug3(NGX_LOG_DEBUG_EVENT,ev->log, 0,
"epoll del event: fd:%dop:%d ev:%08XD",
c->fd, op, ee.events);
if (epoll_ctl(ep, op, c->fd, &ee) ==-1) {
ngx_log_error(NGX_LOG_ALERT,ev->log, ngx_errno,
"epoll_ctl(%d, %d)failed", op, c->fd);
return NGX_ERROR;
}
ev->active = 0;
return NGX_OK;
}
函数ngx_epoll_add_connections函数
具体函数
staticngx_int_t
ngx_epoll_add_connection(ngx_connection_t*c)
{
struct epoll_event ee;
ee.events = EPOLLIN|EPOLLOUT|EPOLLET;
ee.data.ptr = (void *) ((uintptr_t) c |c->read->instance);
ngx_log_debug2(NGX_LOG_DEBUG_EVENT,c->log, 0,
"epoll add connection:fd:%d ev:%08XD", c->fd, ee.events);
if (epoll_ctl(ep, EPOLL_CTL_ADD, c->fd,&ee) == -1) {
ngx_log_error(NGX_LOG_ALERT, c->log,ngx_errno,
"epoll_ctl(EPOLL_CTL_ADD, %d) failed", c->fd);
return NGX_ERROR;
}
c->read->active = 1;
c->write->active = 1;
return NGX_OK;
}
函数ngx_epoll_del_connection
代码如下
static ngx_int_t
ngx_epoll_del_connection(ngx_connection_t*c, ngx_uint_t flags)
{
int op;
struct epoll_event ee;
/*
* when the file descriptor is closed theepoll automatically deletes
* it from its queue so we do not need todelete explicity the event
* before the closing the file descriptor
*/
if (flags & NGX_CLOSE_EVENT) {
c->read->active = 0;
c->write->active = 0;
return NGX_OK;
}
ngx_log_debug1(NGX_LOG_DEBUG_EVENT,c->log, 0,
"epoll del connection:fd:%d", c->fd);
op = EPOLL_CTL_DEL;
ee.events = 0;
ee.data.ptr = NULL;
if (epoll_ctl(ep, op, c->fd, &ee) ==-1) {
ngx_log_error(NGX_LOG_ALERT, c->log,ngx_errno,
"epoll_ctl(%d, %d)failed", op, c->fd);
return NGX_ERROR;
}
c->read->active = 0;
c->write->active = 0;
return NGX_OK;
}
函数ngx_epoll_process_events
static ngx_int_t
ngx_epoll_process_events(ngx_cycle_t*cycle, ngx_msec_t timer, ngx_uint_t flags)
{
int events;
uint32_t revents;
ngx_int_t instance, i;
ngx_uint_t level;
ngx_err_t err;
ngx_event_t *rev, *wev, **queue;
ngx_connection_t *c;
/* NGX_TIMER_INFINITE == INFTIM */
ngx_log_debug1(NGX_LOG_DEBUG_EVENT,cycle->log, 0,
"epoll timer: %M",timer);
//调epoll_wait获取事件。
events = epoll_wait(ep, event_list, (int)nevents, timer);
err = (events == -1) ? ngx_errno : 0;
//nginx时间缓存和管理。当flag标志位指示要更新时间时,在这里更新
if (flags & NGX_UPDATE_TIME ||ngx_event_timer_alarm) {
ngx_time_update();
}
if (err) {
if (err == NGX_EINTR) {
if (ngx_event_timer_alarm) {
ngx_event_timer_alarm = 0;
return NGX_OK;
}
level = NGX_LOG_INFO;
} else {
level = NGX_LOG_ALERT;
}
ngx_log_error(level, cycle->log,err, "epoll_wait() failed");
return NGX_ERROR;
}
if (events == 0) {
if (timer != NGX_TIMER_INFINITE) {
return NGX_OK;
}
ngx_log_error(NGX_LOG_ALERT,cycle->log, 0,
"epoll_wait()returned no events without timeout");
return NGX_ERROR;
}
ngx_mutex_lock(ngx_posted_events_mutex);
//遍历本次epoll_wait返回所有的所有事件
for (i = 0; i < events; i++) {
//对照着上面提到的ngx_epoll_add_event
c= event_list[i].data.ptr;
//将地址的最后一位,当instance变量标识
instance = (uintptr_t) c & 1;
//把ngx_connection_t的地址还原到真正的地址位
c = (ngx_connection_t *) ((uintptr_t) c& (uintptr_t) ~1);
//取出读事件
rev = c->read;
//判断这个读事件是否为过期事件
if (c->fd == -1 || rev->instance!= instance) {
//当fd套接字描述符为-1或者instance标志位不相等时,表示这个事件已经过期,不用处理
/*
* the stale event from a filedescriptor
* that was just closed in thisiteration
*/
ngx_log_debug1(NGX_LOG_DEBUG_EVENT,cycle->log, 0,
"epoll: staleevent %p", c);
continue;
}
//取出事件类型
revents = event_list[i].events;
ngx_log_debug3(NGX_LOG_DEBUG_EVENT,cycle->log, 0,
"epoll: fd:%dev:%04XD d:%p",
c->fd, revents,event_list[i].data.ptr);
if (revents & (EPOLLERR|EPOLLHUP)){
ngx_log_debug2(NGX_LOG_DEBUG_EVENT,cycle->log, 0,
"epoll_wait()error on fd:%d ev:%04XD",
c->fd, revents);
}
#if 0
if (revents &~(EPOLLIN|EPOLLOUT|EPOLLERR|EPOLLHUP)) {
ngx_log_error(NGX_LOG_ALERT,cycle->log, 0,
"strangeepoll_wait() events fd:%d ev:%04XD",
c->fd, revents);
}
#endif
//如果错误时间没有EPOLLIN或者EPOLLOUT标志,则重新添加EPOLLIN、EPOLLOUT标志
if ((revents & (EPOLLERR|EPOLLHUP))
&& (revents &(EPOLLIN|EPOLLOUT)) == 0)
{
/*
* if the error events werereturned without EPOLLIN or EPOLLOUT,
* then add these flags to handlethe events at least in one
* active handler
*/
revents |= EPOLLIN|EPOLLOUT;
}
if ((revents & EPOLLIN) &&rev->active) {
//flags参数中含有NGX_POST_EVENT表示这批事件要延迟处理
if ((flags &NGX_POST_THREAD_EVENTS) && !rev->accept) {
rev->posted_ready = 1;
} else {//事件已经就绪
rev->ready = 1;
}
if (flags & NGX_POST_EVENTS){//flags参数含有NGX_POST_EVENTS标志这批要延后处理
//决定加入哪个队列
queue = (ngx_event_t **)(rev->accept ?
&ngx_posted_accept_events : &ngx_posted_events);
//将这个事件添加到响应的执行队列中
ngx_locked_post_event(rev,queue);
} else {
//立即调用读事件的回调方法来处理这个事件
rev->handler(rev);
}
}
//取出读事件
wev = c->write;
if ((revents & EPOLLOUT) &&wev->active) {
//判断这个读事件的文件描述符是否非法,若是-1或者instance为不等,则跳出本次
if (c->fd == -1 ||wev->instance != instance) {
/*
* the stale event from a file descriptor
* that was just closed in thisiteration
*/
//当fd套接字描述符为-1或者instance标志位不想等时,表示这个事件已经过期,不处理
ngx_log_debug1(NGX_LOG_DEBUG_EVENT, cycle->log, 0,
"epoll: stale event%p", c);
continue;
}
if (flags &NGX_POST_THREAD_EVENTS) {
wev->posted_ready = 1;
} else {
wev->ready = 1;
}
if (flags & NGX_POST_EVENTS) {
//将这个时间添加到post队列中延后处理
ngx_locked_post_event(wev,&ngx_posted_events);
} else {
//立即调用这个写事件的回调方法来处理这个事件
wev->handler(wev);
}
}
}
ngx_mutex_unlock(ngx_posted_events_mutex);
return NGX_OK;
}
最后做几个问题的探讨:
一、在ngx_epoll_add_event中,有部分代码如下:
if (event == NGX_READ_EVENT) {
e = c->write;
prev = EPOLLOUT;
#if (NGX_READ_EVENT != EPOLLIN)
events = EPOLLIN;
#endif
} else {
e = c->read;
prev = EPOLLIN;
#if (NGX_WRITE_EVENT != EPOLLOUT)
events = EPOLLOUT;
#endif
}
if语句执行的是判断是否是读事件,而里面却执行的写操作的相关信息,同样,else里是读操作的一些信息(或者说信息不太准确),为什么会这样呢?
if (e->active) {
op = EPOLL_CTL_MOD;
events |= prev;
} else {
op = EPOLL_CTL_ADD;
}
这是因为调epoll_ctl来添加时是两种操作,add和mod,如果是第一操作,则操作为add,如果file descripter已经在epoll中,则可以通过mod改变原来的监控方式。
根据man手册中的epoll部分介绍,
Q1 What happens if you register the same file descriptor on an epoll instance twice?
A1 You will probably get EEXIST. However, it is possible to add a duplicate (dup(2), dup2(2), fcntl(2) F_DUPFD) descriptor to the same
epoll instance. This can be a useful technique for filtering events, if the duplicate file descriptors are registered with different
events masks.
即如果fd已经存在于epoll中,则errno是EEXIST
nginx解决此问题就是采用上述方法
比如,如果event是读事件,用变量e指向连接的读事件,用prev记录flag,下面判断e->active,假设连接已经存在于epoll中,如果没有if (event == NGX_READ_EVENT)的操作,则在调用epoll_ctl时会产生错误,错误码是EEXIST
二、主要的处理函数ngx_epoll_process_events
用语言描述其逻辑
epoll_wait获取时间数目
更新时间
for循环遍历处理
如果是读事件则调读事件回调函数
如果是写事件则调用写事回调函数
以上便是epoll模块的大体处理流程,第一次写博客,错误之处或者疑问之处,请多指正。
参考资料:
1、《深入理解nginx》
2、https://blog.csdn.net/brainkick/article/details/9080789
3、man epoll