内核驱动 poll 函数是支撑 poll,epoll 和 select 内核函数的底层机制。它可以查询一个或多个文件描述符的读写状态。poll 函数可以返回一个基于位的mask 值,用来表示是否可以无阻塞的读或写,并且能够在可读或写的时候唤醒相关休眠进程。如果将poll 函数置为 NULL,表示该设备可以无阻塞的读和写。
文件对象的操作结构体
Linux 中文件对象的操作由 file_operations 结构体表示:
include/linux/fs.h
struct file_operations {
struct module *owner;
loff_t (*llseek) (struct file *, loff_t, int);
ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
unsigned int (*poll) (struct file *, struct poll_table_struct *);
int (*open) (struct inode *, struct file *);
...
};
基于特定设备的 poll 函数实现
对于不同的设备可以支持内核文件操作的部分或全部操作,如特定的设备类型 dtlk (DoubleTalk PC driver for Linux),它支持的文件操作如下:
drivers/char/dtlk.c
static const struct file_operations dtlk_fops =
{
.owner = THIS_MODULE,
.read = dtlk_read,
.write = dtlk_write,
.poll = dtlk_poll,
.unlocked_ioctl = dtlk_ioctl,
.open = dtlk_open,
.release = dtlk_release,
};
具体的 poll 函数实现根据设备不同,但是一般都分为如下两个部分:
- 调用 poll_wait 将包含当前文件描述符的 poll_table_entry 加入等待队列
- 返回一个 mask 值,表明目前哪些操作是可以无阻塞进行的
static unsigned int dtlk_poll(struct file *file, poll_table * wait)
{
int mask = 0;
unsigned long expires;
TRACE_TEXT(" dtlk_poll");
poll_wait(file, &dtlk_process_list, wait);
if (dtlk_has_indexing && dtlk_readable()) {
del_timer(&dtlk_timer);
mask = POLLIN | POLLRDNORM;
}
if (dtlk_writeable()) {
del_timer(&dtlk_timer);
mask |= POLLOUT | POLLWRNORM;
}
/* there are no exception conditions */
/* There won't be any interrupts, so we set a timer instead. */
expires = jiffies + 3*HZ / 100;
mod_timer(&dtlk_timer, expires);
return mask;
}
以 select 为例说明 poll 的调用过程及原理
以 select 为例:
- poll_initwait() 会将 poll_table 的操作函数绑定到 __pollwait()
- (*f_op->poll)(file, wait) 最终调用的是 __pollwait(),以将 poll_table_entry 加入等待队列
- poll_freewait() 释放相关资源
fs/select.c
int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
{
struct poll_wqueues table;
poll_initwait(&table);
for (;;) {
...
mask = (*f_op->poll)(file, wait);
...
}
poll_freewait(&table);
return retval;
}
poll_initwait 实现
fs/select.c
typedef void (*poll_queue_proc)(struct file *, wait_queue_head_t *, struct poll_table_struct *);
typedef struct poll_table_struct {
poll_queue_proc qproc;
unsigned long key;
} poll_table;
void poll_initwait(struct poll_wqueues *pwq)
{
init_poll_funcptr(&pwq->pt, __pollwait);
pwq->polling_task = current;
pwq->triggered = 0;
pwq->error = 0;
pwq->table = NULL;
pwq->inline_index = 0;
}
EXPORT_SYMBOL(poll_initwait);
static inline void init_poll_funcptr(poll_table *pt, poll_queue_proc qproc)
{
pt->qproc = qproc;
pt->key = ~0UL; /* all events enabled */
}
/* Add a new entry */
static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
poll_table *p)
{
struct poll_wqueues *pwq = container_of(p, struct poll_wqueues, pt);
struct poll_table_entry *entry = poll_get_entry(pwq);
if (!entry)
return;
get_file(filp);
entry->filp = filp;
entry->wait_address = wait_address;
entry->key = p->key;
init_waitqueue_func_entry(&entry->wait, pollwake);
entry->wait.private = pwq;
add_wait_queue(wait_address, &entry->wait);
}
poll_wait 实现
fs/select.c
static inline void poll_wait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p)
{
if (p && wait_address)
p->qproc(filp, wait_address, p);
}
poll_freewait 实现
fs/select.c
void poll_freewait(struct poll_wqueues *pwq)
{
struct poll_table_page * p = pwq->table;
int i;
for (i = 0; i < pwq->inline_index; i++)
free_poll_entry(pwq->inline_entries + i);
while (p) {
struct poll_table_entry * entry;
struct poll_table_page *old;
entry = p->entry;
do {
entry--;
free_poll_entry(entry);
} while (entry > p->entries);
old = p;
p = p->next;
free_page((unsigned long) old);
}
}
EXPORT_SYMBOL(poll_freewait);