文章目录
一、系统调用接口sys_poll
应用层调用poll函数,会执行此函数。
fs/select.c
// 此宏是如何一步一步推导到sys_poll见下
SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds,int, timeout_msecs)
{
struct timespec64 end_time, *to = NULL;
int ret;
if (timeout_msecs >= 0) {
to = &end_time;
poll_select_set_timeout(to, timeout_msecs / MSEC_PER_SEC,
NSEC_PER_MSEC * (timeout_msecs % MSEC_PER_SEC));
}
// 关键
ret = do_sys_poll(ufds, nfds, to);
...
return ret;
}
1、SYSCALL_DEFINE3()宏
#define SYSCALL_DEFINE3(name, ...) SYSCALL_DEFINEx(3, _##name, __VA_ARGS__)
2、SYSCALL_DEFINEx()宏
#define SYSCALL_DEFINEx(x, sname, ...) \
SYSCALL_METADATA(sname, x, __VA_ARGS__) \
__SYSCALL_DEFINEx(x, sname, __VA_ARGS__)
3、__SYSCALL_DEFINEx()宏
#ifndef __SYSCALL_DEFINEx
#define __SYSCALL_DEFINEx(x, name, ...) \
asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \
__attribute__((alias(__stringify(__se_sys##name)))); \
...
#endif /* __SYSCALL_DEFINEx */
4、timespec64结构体
include/linux/time64.h
// 定时器会用到此结构体
struct timespec64
{
// seconds,和硬件平台相关,32位下为long
time64_t tv_sec;
// nanoseconds
long tv_nsec;
};
二、do_sys_poll()函数
fs/select.c
-
复制用户空间pollfd数组到内核空间
分配静态数组内存(一个poll_list结构体)
动态分配内存(一组poll_list结构体) -
调用do_poll函数
返回修改后的pollfd数组到用户空间
主要是为了返回修改后revents值
static int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, struct timespec64 *end_time)
{
struct poll_wqueues table;
int err = -EFAULT, fdcount, len, size;
// 此宏定义详见下
long stack_pps[POLL_STACK_ALLOC/sizeof(long)];
struct poll_list *const head = (struct poll_list *)stack_pps;
struct poll_list *walk = head;
// 用户空间调用poll函数的第二个参数,数组长度
unsigned long todo = nfds;
...
// 获取静态分配的数组大小,取最小值
// 判断可以存放多少个用户空间传进来的pollfd结构体
len = min_t(unsigned int, nfds, N_STACK_PPS);
for (;;) {
walk->next = NULL;
walk->len = len;
if (!len)
break;
if (copy_from_user(walk->entries, ufds + nfds-todo,
sizeof(struct pollfd) * walk->len))
goto out_fds;
todo -= walk->len;
if (!todo)
break;
// 计算剩下的文件描述符所需空间大小,最大为一个页
len = min(todo, POLLFD_PER_PAGE);
size = sizeof(struct poll_list) + sizeof(struct pollfd) * len;
walk = walk->next = kmalloc(size, GFP_KERNEL);
if (!walk) {
err = -ENOMEM;
goto out_fds;
}
}
//
poll_initwait(&table);
// 详见下,fdcount为所有发生事件的文件数量
// 为了返回修改后的pollfd数组到用户空间
fdcount = do_poll(head, &table, end_time);
poll_freewait(&table);
// 在此遍历poll_list(链表)
for (walk = head; walk; walk = walk->next) {
struct pollfd *fds = walk->entries;
int j;
// 遍历每个poll_list元素的pollfd数组
for (j = 0; j < walk->len; j++, ufds++)
// 返回事件值给用户空间
if (__put_user(fds[j].revents, &ufds->revents))
goto out_fds;
}
...
}
1、相关宏定义
#define FRONTEND_STACK_ALLOC 256
#define POLL_STACK_ALLOC FRONTEND_STACK_ALLOC
#define N_STACK_PPS ((sizeof(stack_pps) - sizeof(struct poll_list)) / \
sizeof(struct pollfd))
2、poll_initwait()函数
fs/select.c
void poll_initwait(struct poll_wqueues *pwq)
{
// 参数2是函数
init_poll_funcptr(&pwq->pt, __pollwait);
...
}
3、init_poll_funcptr()函数
include/linux/poll.h
// 结构体变量的赋值
static inline void init_poll_funcptr(poll_table *pt, poll_queue_proc qproc)
{
// poll_wqueues->poll_table->poll_queue_proc 初始化为 __poll_wait
pt->_qproc = qproc;
pt->_key = ~(__poll_t)0; /* all events enabled */
}
4、__pollwait()函数
fs/select.c
此函数会在 poll_wait 函数中被调用
static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
poll_table *p)
{
struct poll_wqueues *pwq = container_of(p, struct poll_wqueues,pt);
// 每次调用poll_get_entry函数,都会消耗掉poll_wqueues中的此结构体成员数组的元素
struct poll_table_entry *entry = poll_get_entry(pwq);
if (!entry)
return;
entry->filp = get_file(filp);
entry->wait_address = wait_address;
entry->key = p->_key;
init_waitqueue_func_entry(&entry->wait, pollwake);
entry->wait.private = pwq;
// 添加到等待队列
add_wait_queue(wait_address, &entry->wait);
}
拓展:poll_table_entry结构体
struct poll_table_entry {
struct file *filp;
unsigned long key;
// 等待队列元素
wait_queue_t wait;
wait_queue_head_t *wait_address;
};
(1)poll_get_entry()函数
fs/select.c
static struct poll_table_entry *poll_get_entry(struct poll_wqueues *p)
{
struct poll_table_page *table = p->table;
if (p->inline_index < N_INLINE_POLL_ENTRIES)
return p->inline_entries + p->inline_index++;
if (!table || POLL_TABLE_FULL(table)) {
struct poll_table_page *new_table;
// 不够用就分配内存
new_table = (struct poll_table_page *) __get_free_page(GFP_KERNEL);
if (!new_table) {
p->error = -ENOMEM;
return NULL;
}
new_table->entry = new_table->entries;
new_table->next = table;
p->table = new_table;
table = new_table;
}
return table->entry++;
}
三、do_poll()函数
fs/select.c
- 三重循环
第一重for:确保线程/进程被唤醒后,继续执行一次循环体内容
第二重for:遍历一组poll_list
第三重for:遍历每个poll_list中的一组pollfd
static int do_poll(struct poll_list *list, struct poll_wqueues *wait,
struct timespec64 *end_time)
{
poll_table* pt = &wait->pt;
ktime_t expire, *to = NULL;
int timed_out = 0, count = 0;
u64 slack = 0;
__poll_t busy_flag = net_busy_loop_on() ? POLL_BUSY_LOOP : 0;
unsigned long busy_start = 0;
...
for (;;) {
struct poll_list *walk;
bool can_busy_loop = false;
// 二重:遍历 poll_list 结构体(链表)
for (walk = list; walk != NULL; walk = walk->next) {
struct pollfd * pfd, * pfd_end;
pfd = walk->entries;
pfd_end = pfd + walk->len;
// 三重:遍历 pollfd 结构体(数组)
for (; pfd != pfd_end; pfd++) {
if (do_pollfd(pfd, pt, &can_busy_loop,
busy_flag)) {
// 返回值不为0,count++,表示有返回事件的文件数量
// 返回值不为0,文件没有发生任何事件
// 返回值为0,发生了某(些)事件
count++;
pt->_qproc = NULL;
/* found something, stop busy polling */
busy_flag = 0;
can_busy_loop = false;
}
}
}
pt->_qproc = NULL;
if (!count) {
count = wait->error;
// 检查当前进程或者线程是否有信号处理
if (signal_pending(current))
count = -EINTR;
}
// 下面的break会跳出最外层循环
if (count || timed_out)
break;
...
// 真正使当前进程或者线程休眠的函数
// 若timeout为1,下一次最外层循环将会从上面的break跳出
// 此函数是阻塞的,当等待的事件发生时,会从此处继续向下执行
if (!poll_schedule_timeout(wait, TASK_INTERRUPTIBLE, to, slack))
timed_out = 1;
}
return count;
}
四、do_pollfd()函数
fs/select.c
static inline __poll_t do_pollfd(struct pollfd *pollfd, poll_table *pwait,
bool *can_busy_poll,
__poll_t busy_flag)
{
int fd = pollfd->fd;
__poll_t mask = 0, filter;
struct fd f;
if (fd < 0)
goto out;
...
// 关键代码
mask = vfs_poll(f.file, pwait);
...
}
五、vfs_poll()函数
include/linux/poll.h
static inline __poll_t vfs_poll(struct file *file, struct poll_table_struct *pt)
{
if (unlikely(!file->f_op->poll))
return DEFAULT_POLLMASK;
// 最终调用文件操作接口的poll指针
return file->f_op->poll(file, pt);
}
六、驱动层poll底层接口
include/linux/fs.h
// 参数2的数据类型其实是 poll_table 结构体的别名
__poll_t (*poll) (struct file *, struct poll_table_struct *);
参数:
- filp :要打开的设备文件
- wait :结构体 poll_table_struct 类型指针
返回值:
- 文件可用事件类型
1、poll驱动模板
static __poll_t xxx_poll(struct file *filp, struct poll_table_struct *wait)
{
unsigned int mask = 0;
// 详见下,注意第二个参数就是等待队列头
poll_wait(filp, &yyy, wait);
// 判断驱动程序里面发生了哪些条件,这些条件对应哪些文件的事件,设置好事件后,return回去
if(...)
{
mask |= POLLOUT | ...;
}
return mask;
}
2、poll_wait()函数
include/linux/poll.h
// 参数2:等待队列头
static inline void poll_wait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p)
{
if (p && p->_qproc && wait_address)
// 其实就 上面初始化为 __pollwait 函数
// 参数2指定等待队列头
p->_qproc(filp, wait_address, p);
}
七、相关的结构体框图