poll作为IO复用函数之一(select、poll、epoll),了解一个IO复用函数原理,觉得另外的IO复用函数也就大同小异了。
注:基于linux-4.14.67进行分析梳理
目录
1.poll机制内核框架
2.poll相关结构体
3.poll机制和等待队列的关联体现
梳理用户空间调用poll,会调用内核的sys_poll,内核的sys_poll会调用到我们驱动编写的.poll。所以先从内核函数sys_poll进行分析
摘自:8.中断按键驱动程序之poll机制(详解)-CSDN博客
sys_poll代码
在内核sys_poll里调用函数do_sys_poll
SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds,
int, timeout_msecs)
{
...
ret = do_sys_poll(ufds, nfds, to);
...
}
2.poll相关结构体
函数do_sys_poll 就会涉及很多poll的结构体:poll_wqueues、poll_table、poll_table_entry
static int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds,
struct timespec64 *end_time)
{
struct poll_wqueues table;
...
long stack_pps[POLL_STACK_ALLOC/sizeof(long)];
struct poll_list *const head = (struct poll_list *)stack_pps;
struct poll_list *walk = head;
...
poll_initwait(&table);
fdcount = do_poll(head, &table, end_time);
poll_freewait(&table);
...
}
结构体定义以及之间的联系
struct poll_wqueues {
poll_table pt;
struct poll_table_page *table;
struct task_struct *polling_task;
int triggered;
int error;
int inline_index;
struct poll_table_entry inline_entries[N_INLINE_POLL_ENTRIES];
};
typedef struct poll_table_struct {
poll_queue_proc _qproc;
unsigned long _key;
} poll_table;
struct poll_table_entry {
struct file *filp;
unsigned long key;
wait_queue_entry_t wait;
wait_queue_head_t *wait_address;
};
“poll_wqueues:用于在select/poll时,如果需要阻塞进程,将进程添加到描述符表标识的所有文件的poll等待队列中,以便任意一个文件可进行非阻塞I/O操作时唤醒进程。
poll_table:对每个文件进行poll操作时,判断是否能够非阻塞的进行key值(poll事件组成)标识的I/O操作;如果不能,调用回调函数pqroc将进程添加到文件的poll等待队列中。
poll_table_entry:用于阻塞进程并将进程添加到文件的poll等待队列中,一个文件对应一个poll_table_entry。”
摘自:I/O复用 poll的内核源码剖析_n_inline_poll_entries-CSDN博客
将结构体之间的联系代入函数场景分析:调用函数 do_sys_poll会定义结构体变量 struct poll_wqueues table,紧接着在函数do_sys_poll内调用函数poll_initwait对结构体变量table成员(重点关注成员pwq->pt、pwq->polling_task)初始化,初始化的成员参考函数poll_initwait。(从这里可以看到结构体struct poll_wqueues和poll_table的联系)
void poll_initwait(struct poll_wqueues *pwq)
{
init_poll_funcptr(&pwq->pt, __pollwait);
pwq->polling_task = current;
pwq->triggered = 0;
pwq->error = 0;
pwq->table = NULL;
pwq->inline_index = 0;
}
static inline void init_poll_funcptr(poll_table *pt, poll_queue_proc qproc)
{
pt->_qproc = qproc;
pt->_key = ~0UL; /* all events enabled */
}
接着在函数do_sys_poll内调用函数do_poll,在函数do_poll内调用函数do_pollfd,在函数do_pollfd会调用到我们驱动写的.poll函数,IO复用驱动对应的.poll函数都会执行函数poll_wait
static int do_poll(struct poll_list *list, struct poll_wqueues *wait,
struct timespec64 *end_time)
{
poll_table* pt = &wait->pt;
...
for (;;) {
struct poll_list *walk;
bool can_busy_loop = false;
for (walk = list; walk != NULL; walk = walk->next) {
struct pollfd * pfd, * pfd_end;
pfd = walk->entries;
pfd_end = pfd + walk->len;
for (; pfd != pfd_end; pfd++) {
...
if (do_pollfd(pfd, pt, &can_busy_loop,
busy_flag)) {
...
}
static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait,
bool *can_busy_poll,
unsigned int busy_flag)
{
...
fd = pollfd->fd;
...
if (f.file) {
...
if (f.file->f_op->poll) {
...
mask = f.file->f_op->poll(f.file, pwait);
...
}
static inline void poll_wait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p)
{
if (p && p->_qproc && wait_address)
p->_qproc(filp, wait_address, p);
}
3.poll机制和等待队列的关联体现
上节提到若驱动的.poll函数执行函数poll_wait,可以看到终于和等待队列牵扯上了,poll_wait会执行回调函数,这里的回调函数在初始化结构体变量 struct poll_wqueues table 的成员pt时进行了设置,pt->_qproc = qproc; 即函数__pollwait
static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
poll_table *p)
{
struct poll_wqueues *pwq = container_of(p, struct poll_wqueues, pt);
struct poll_table_entry *entry = poll_get_entry(pwq);
if (!entry)
return;
entry->filp = get_file(filp);
entry->wait_address = wait_address;
entry->key = p->_key;
init_waitqueue_func_entry(&entry->wait, pollwake);
entry->wait.private = pwq;
add_wait_queue(wait_address, &entry->wait);
}
在函数里对用户空间传入的文件描述符用结构体struct poll_table_entry进行实例化,从这里可以看到结构体结构体struct poll_wqueues和struct poll_table_entry的联系, 在这里需要注意等待队列链表元素初始化的内容,等待队列链表元素结构体如下:
struct wait_queue_entry {
unsigned int flags;
void *private;
wait_queue_func_t func;
struct list_head entry;
};
不都是将private设置为待唤醒的进程,但这里却设置成了结构体struct poll_wqueues(entry->wait.private = pwq),可以查看函数pollwake一探究竟
static int pollwake(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
{
struct poll_table_entry *entry;
entry = container_of(wait, struct poll_table_entry, wait);
if (key && !((unsigned long)key & entry->key))
return 0;
return __pollwake(wait, mode, sync, key);
}
static int __pollwake(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
{
struct poll_wqueues *pwq = wait->private;
DECLARE_WAITQUEUE(dummy_wait, pwq->polling_task);
...
return default_wake_function(&dummy_wait, mode, sync, key);
}
可以看到最终还是通过将private设置为待唤醒的进程来唤醒(pwq在前面已经初始化成员polling_task为待唤醒进程:pwq->polling_task = current),剩下的就是等待队列那一套处理逻辑。