linux free函数源码,linuxepoll模型源码分析一函数实现

最新推荐文章于 2021-05-14 14:17:01 发布

泰坦V

最新推荐文章于 2021-05-14 14:17:01 发布

阅读量180

点赞数

文章标签： linux free函数源码

一。模块的加载

linux把EPOLL当做一个模块，模块入口函数的代码如下：

/************epoll模块入口函数***********/

static int __init eventpoll_init(void)

{

struct sysinfo si;

si_meminfo(&si);

* Allows top 4% of lomem to be allocated for epoll watches (per user).

max_user_watches = (((si.totalram - si.totalhigh) / 25) <

EP_ITEM_COST;

BUG_ON(max_user_watches

* Initialize the structure used to perform epoll file descriptor

* inclusion loops checks.

ep_nested_calls_init(&poll_loop_ncalls);

/* Initialize the structure used to perform safe poll wait head wake ups */

ep_nested_calls_init(&poll_safewake_ncalls);

/* Initialize the structure used to perform file's f_op->poll() calls */

ep_nested_calls_init(&poll_readywalk_ncalls);

/* Allocates slab cache used to allocate "struct epitem" items */

epi_cache = kmem_cache_create("eventpoll_epi", sizeof(struct epitem),

0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);

/* Allocates slab cache used to allocate "struct eppoll_entry" */

pwq_cache = kmem_cache_create("eventpoll_pwq",

sizeof(struct eppoll_entry), 0, SLAB_PANIC, NULL);

return 0;

}

fs_initcall(eventpoll_init);

/*fs_initcall 函数即是module_init函数*/

这个函数主要是进行一些初始化配置，同时创建了2个内核cache用于存放epitem和epoll_entry。

二 epoll_create函数的实现

epoll_create 创建一个epoll实例，即一个epoll的文件(epfd)，同时创建并初始化一个struct eventpoll，其中efpd所对应的file的private_data指针即指向了eventpoll变量，因此，知道epfd就可以拿到file，即拿到了eventpoll变量。

下面我们来看具体实现：

SYSCALL_DEFINE1(epoll_create, int, size) //epoll_create函数带一个整型参数

{

if (size <= 0)

return -EINVAL;

return sys_epoll_create1(0); //实际上是调用epoll_create1

}

* Open an eventpoll file descriptor.

SYSCALL_DEFINE1(epoll_create1, int, flags)

{

int error;

struct eventpoll *ep = NULL;

/* Check the EPOLL_* constant for consistency. */

BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC);

if (flags & ~EPOLL_CLOEXEC)

return -EINVAL;

* Create the internal data structure ("struct eventpoll").

error = ep_alloc(&ep);//分配eventpoll结构体

if (error

return error;

* Creates all the items needed to setup an eventpoll file. That is,

* a file structure and a free file descriptor.

error = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep,

O_RDWR | (flags & O_CLOEXEC)); //创建与eventpoll结构体相对应的file结构，ep保存在file->private_data结构中

//eventpoll_fops 为该文件所对应的操作函数

if (error

ep_free(ep); //如果出错则释放该eventpoll结构体

return error;

}

epoll_create可能调用一组关联函数ep_alloc和ep_free函数分别负责eventpoll结构体的内存分配和释放

函数anon_inode_getfd创建与eventpoll结构体相对应的file结构，ep保存在file->private_data结构中，同时为该新文件定义操作函数

从这几行代码可以看出，epoll_create主要做了两件事：

* 创建并初始化一个eventpoll结构体变量

* 创建epoll的file结构，并指定file的private_data指针指向刚创建的eventpoll变量，这样，只要根据epoll文件描述符epfd就可以拿到file进而就拿到了eventpoll变量了，该eventpoll就是epoll_ctl和epoll_wait工作的场所

对外看来，epoll_create就做了一件事，那就是创建一个epoll文件，事实上，更关键的是，它创建了一个eventpoll结构体变量，该变量为epoll_ctl和epoll_wait的工作打下了基础。

ep_alloc,ep_free以及anon_inode_getfd的具体实现可以查看源代码。

三 epoll_ctl的实现

epoll_ctl系统调用主要是针对epfd所对应的epoll实例进行增、删、改fd的操作，一个新创建的epoll文件带有一个struct eventpoll结构,同时struct eventpoll这个结构上再挂一个红黑树,红黑树上的每个节点挂的都是struct epitem,这个红黑树就是每次epoll_ctl时fd存放的地方!对应该红黑树上节点的操作，有ep_find，ep_insert,ep_remove,ep_modify四个函数，它们都将epoll文件实例的eventpoll结构作为参数传递。

下面来看看该函数的具体实现：

* The following function implements the controller interface for

* the eventpoll file that enables the insertion/removal/change of

* file descriptors inside the interest set.

* epfd为该epoll套接字实例,op表示对应的操作，fd表示新加入的套接字，

* 结构体epoll_event 用于注册fd所感兴趣的事件和回传在fd上所发生待处理的事件

SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,

struct epoll_event __user *, event)

{

int error;

int did_lock_epmutex = 0;

struct file *file, *tfile;

struct eventpoll *ep;

struct epitem *epi;

struct epoll_event epds;

error = -EFAULT;

if (ep_op_has_event(op) &&

copy_from_user(&epds, event, sizeof(struct epoll_event))) //copy_from_user将用户传入的event_poll拷贝到epds中，以供自己使用

goto error_return;

/* Get the "struct file *" for the eventpoll file */

error = -EBADF;

file = fget(epfd); //获取该epoll套接字实例所对应的文件

if (!file)

goto error_return;

/* Get the "struct file *" for the target file */

tfile = fget(fd);

if (!tfile)

goto error_fput;

/* The target file descriptor must support poll */

error = -EPERM;

if (!tfile->f_op || !tfile->f_op->poll)

goto error_tgt_fput;

* We have to check that the file structure underneath the file descriptor

* the user passed to us _is_ an eventpoll file. And also we do not permit

* adding an epoll file descriptor inside itself.

error = -EINVAL;

if (file == tfile || !is_file_epoll(file))

goto error_tgt_fput;

* At this point it is safe to assume that the "private_data" contains

* our own data structure.

ep = file->private_data;//获取epoll实例所对应的eventpoll结构体

* When we insert an epoll file descriptor, inside another epoll file

* descriptor, there is the change of creating closed loops, which are

* better be handled here, than in more critical paths.

* We hold epmutex across the loop check and the insert in this case, in

* order to prevent two separate inserts from racing and each doing the

* insert "at the same time" such that ep_loop_check passes on both

* before either one does the insert, thereby creating a cycle.

if (unlikely(is_file_epoll(tfile) && op == EPOLL_CTL_ADD)) {

mutex_lock(&epmutex);

did_lock_epmutex = 1;

error = -ELOOP;

if (ep_loop_check(ep, tfile) != 0)

goto error_tgt_fput;

}

mutex_lock(&ep->mtx);

* Try to lookup the file inside our RB tree, Since we grabbed "mtx"

* above, we can be sure to be able to use the item looked up by

* ep_find() till we release the mutex.

* ep_find即从ep中的红黑树中根据tfile和fd来查找epitem

epi = ep_find(ep, tfile, fd);

error = -EINVAL;

switch (op) {

case EPOLL_CTL_ADD: //对应于socket上事件注册

if (!epi) { //红黑树中不存在这个节点

epds.events |= POLLERR | POLLHUP; //或操作，确保“出错、连接挂起”被当做感兴趣事件，因为底层有义务将出错信息返回给应用

error = ep_insert(ep, &epds, tfile, fd);

} else

error = -EEXIST;

break;

case EPOLL_CTL_DEL: //删除

if (epi) //存在则删除这个节点，不存在则报错

error = ep_remove(ep, epi);

else

error = -ENOENT;

break;

case EPOLL_CTL_MOD: //修改

if (epi) { //存在则修改该fd所对应的事件，不存在则报错

epds.events |= POLLERR | POLLHUP;

error = ep_modify(ep, epi, &epds);

} else

error = -ENOENT;

break;

}

mutex_unlock(&ep->mtx);

error_tgt_fput:

if (unlikely(did_lock_epmutex))

mutex_unlock(&epmutex);

fput(tfile);

error_fput:

fput(file);

error_return:

return error;

}

结合上一篇博文的内容，对于往epoll实例中添加新的套接字，其实现主要通过函数ep_insert来完成，本文先分析epoll_wait再回过头来分析ep_insert

四 epoll_wait的实现

epoll_wait等待epoll文件上的I/O事件发生，其代码如下：

* Implement the event wait interface for the eventpoll file. It is the kernel

* part of the user space epoll_wait(2).

SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events,

int, maxevents, int, timeout)

{

int error;

struct file *file;

struct eventpoll *ep;

/* The maximum number of event must be greater than zero */

if (maxevents <= 0 || maxevents > EP_MAX_EVENTS)

return -EINVAL;

/* Verify that the area passed by the user is writeable */

if (!access_ok(VERIFY_WRITE, events, maxevents * sizeof(struct epoll_event))) {

error = -EFAULT;

goto error_return;

}

/* Get the "struct file *" for the eventpoll file */

error = -EBADF;

file = fget(epfd);

if (!file)

goto error_return;

* We have to check that the file structure underneath the fd

* the user passed to us _is_ an eventpoll file.

error = -EINVAL;

if (!is_file_epoll(file))

goto error_fput;

* At this point it is safe to assume that the "private_data" contains

* our own data structure.

ep = file->private_data;//获取struct eventpoll结构

/* Time to fish for events ... */

error = ep_poll(ep, events, maxevents, timeout);//核心代码

error_fput:

fput(file);

error_return:

return error;

}可以看出该函数主要时通过epfd获取对应的struct eventpoll结构，然后调用ep_poll函数，下面来看ep_poll函数的实现：

/**

* ep_poll - Retrieves ready events, and delivers them to the caller supplied

* event buffer.

* @ep: Pointer to the eventpoll context.

* @events: Pointer to the userspace buffer where the ready events should be

* stored.

* @maxevents: Size (in terms of number of events) of the caller event buffer.

* @timeout: Maximum timeout for the ready events fetch operation, in

* milliseconds. If the @timeout is zero, the function will not block,

* while if the @timeout is less than zero, the function will block

* until at least one event has been retrieved (or an error

* occurred).

* Returns: Returns the number of ready events which have been fetched, or an

* error code, in case of error.

static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,

int maxevents, long timeout)

{

int res = 0, eavail, timed_out = 0;

unsigned long flags;

long slack = 0;

wait_queue_t wait;

ktime_t expires, *to = NULL;

/* The call waits for a maximum time of timeout milliseconds. Specifying a timeout of -1 makes epoll_wait()

wait indefinitely,while specifying a timeout equal to zero makes epoll_wait() to return immediately even if no events

are available (return code equal to zero).*/

if (timeout > 0) {

struct timespec end_time = ep_set_mstimeout(timeout);

slack = select_estimate_accuracy(&end_time);

to = &expires;

*to = timespec_to_ktime(end_time);

} else if (timeout == 0) {

* Avoid the unnecessary trip to the wait queue loop, if the

* caller specified a non blocking operation.

timed_out = 1;

spin_lock_irqsave(&ep->lock, flags);

goto check_events;

}

fetch_events:

spin_lock_irqsave(&ep->lock, flags);

// 如果rdllist中还没有epitem时，就开始等待了

if (!ep_events_available(ep)) {

* We don't have any available event to return to the caller.

* We need to sleep here, and we will be wake up by

* ep_poll_callback() when events will become available.

// 初始化等待队列，等待队列项对应的线程即为当前线程

init_waitqueue_entry(&wait, current);

// 不用多说，先将当前线程挂到等待队列上，之后在调用schedule_timeout

// 时，就开始了超时等待了

__add_wait_queue_exclusive(&ep->wq, &wait);

for (;;) {

* We don't want to sleep if the ep_poll_callback() sends us

* a wakeup in between. That's why we set the task state

* to TASK_INTERRUPTIBLE before doing the checks.

// 因为会被阻塞，这里先设置线程状态为可中断

set_current_state(TASK_INTERRUPTIBLE);

// 整个循环的核心，其实就在看rdllist中是否有数据，或者等待超时

// 应征了前面的说明，epoll_wait只需要等着收集数据即可

if (ep_events_available(ep) || timed_out)

break;

if (signal_pending(current)) {

res = -EINTR;

break;

}

spin_unlock_irqrestore(&ep->lock, flags);

if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS))

timed_out = 1;

spin_lock_irqsave(&ep->lock, flags);

}

__remove_wait_queue(&ep->wq, &wait);

set_current_state(TASK_RUNNING);

}

check_events:

/* Is it worth to try to dig for events ? */

eavail = ep_events_available(ep);

spin_unlock_irqrestore(&ep->lock, flags);

* Try to transfer events to user space. In case we get 0 events and

* there's still timeout left over, we go trying again in search of

* more luck.

if (!res && eavail &&

!(res = ep_send_events(ep, events, maxevents)) && !timed_out)

goto fetch_events;

return res;

}

29-43行：主要是超时时间的处理，若超时时间为0，则直接检查有没有准备好的I/O事件，有则立即发送给用户空间去处理；若超时时间大于0，计算好精确的超时时间后，等待事件的发生，45-86行等待指定的时间直到有I/O事件出现；

54-58行：如果还没有I/O事件出现，则准备休眠。先初始化等待队列，把当前线程挂在该队列上，同时把这个队列挂在eventpoll结构的wq上，

60-82行：在指定的超时时间内循环检测有没有I/O事件发生，有事件发生、超时或者收到信号都会跳出循环。

83行：运行到此处有I/O事件发生，不用再等待，则移除该队列

linux free函数 源码,linuxepoll模型源码分析一函数实现

linux free函数源码,linuxepoll模型源码分析一函数实现