在应用程序中,我们经常会用到select和poll函数。这当然也是需要我们底层来实现才行。底层对应的函数是struct file_operations中的unsigned int (*poll) (struct file *, struct poll_table_struct *);
第一个参数为file结构体指针,第2个参数为轮询表指针
在poll函数中我们做两件事情:
1、使用poll_wait()函数,将对应的等待队列头部添加到poll_table中去,这里要注意的一点是,poll_wait函数本身不会因此阻塞,它只是将相应的等待队列加入到轮询表当中。阻塞过程由内核来完成。
2、返回一个掩码,来表示当前设备是否可读、可写等
POLLIN // 读事件
POLLRDNORM // 正常的读数据
数据可读,此时read函数可无阻塞使用,将mask设置为上述两个值
POLLOUT // 写事件
POLLWRNORM // 正常的写数据
数据可写,此时write函数可无阻塞使用,将mask设置为上述两个值
POLLERR // 出错
poll_wait()原型是:static inline void poll_wait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p)
功能:将等待队列添加到一个可以查询它上面文件描述符状态变化的polltable表中。
参数:
filp:文件指针
wait_address:等待队列
p:table表
我们来看一个例程,驱动层
#include <linux/init.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/cdev.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/device.h>
#include <asm/atomic.h>
#include <linux/spinlock.h>
#include <linux/semaphore.h>
#include <asm/uaccess.h>
#include <linux/wait.h>
#include <linux/sched.h>
#include <linux/poll.h>
MODULE_LICENSE("GPL");
dev_t devno;
int major = 0;
int minor = 0;
int count = 1;
#define KMAX 1024
char kbuf[KMAX] = {};
int counter = 0; //鐢ㄥ畠璁板綍kbuf涓疄闄呭瓨鍌ㄧ殑瀛楄妭鏁伴噺
struct cdev *pdev;
struct class * pclass;
struct device * pdevice;
struct semaphore sem_r;
struct semaphore sem_w;
wait_queue_head_t wq;
int demo_open(struct inode * inodep, struct file * filep)
{
printk("%s,%d\n", __func__, __LINE__);
return 0;
}
int demo_release(struct inode *inodep, struct file *filep)
{
printk("%s,%d\n", __func__, __LINE__);
return 0;
}
// read(fd, buff, N) --> ... --> demo_read()
ssize_t demo_read(struct file * filep, char __user * buffer, size_t size, loff_t * offlen)
{
if(counter == 0)
{
if(filep->f_flags & O_NONBLOCK)
{
return -EAGAIN;
}
if(wait_event_interruptible(wq,counter != 0))
{
return -ERESTARTSYS;
}
}
down_interruptible(&sem_r);
if(size > counter)
{
size = counter;
}
if(copy_to_user(buffer, kbuf, size) != 0)
{
printk("Failed to copy_to_user.\n");
return -1;
}
counter = 0;
up(&sem_w);
return size;
}
// write(fd, buff, n) --> ... --> demo_write();
ssize_t demo_write(struct file *filep, const char __user *buffer, size_t size, loff_t * offlen)
{
down_interruptible(&sem_w);
if(size > KMAX)
{
return -ENOMEM;
}
if(copy_from_user(kbuf, buffer,size) != 0)
{
printk("Failed to copy_from_user.\n");
return -1;
}
printk("kbuf:%s\n", kbuf);
counter = size;
up(&sem_r);
wake_up(&wq);
return size;
}
// select() --> sys_select() --> ... --> demo_poll();
unsigned int demo_poll(struct file * filep, struct poll_table_struct * table)
{
unsigned int mask = 0;
//将wq添加table列表当中
poll_wait(filep, &wq, table);
if(counter != 0)
{
mask = POLLIN|POLLRDNORM; //如果有数据,设置为可读
}
return mask;
}
struct file_operations fops = {
.owner =THIS_MODULE,
.open = demo_open,
.release = demo_release,
.read = demo_read,
.write = demo_write,
.poll = demo_poll,
};
static int __init demo_init(void)
{
int ret = 0;
printk("%s,%d\n", __func__, __LINE__);
ret = alloc_chrdev_region(&devno,minor,count, "xxx");
if(ret)
{
printk("Failed to alloc_chrdev_region.\n");
return ret;
}
printk("devno:%d , major:%d minor:%d\n", devno, MAJOR(devno), MINOR(devno));
pdev = cdev_alloc();
if(pdev == NULL)
{
printk("Failed to cdev_alloc.\n");
goto err1;
}
cdev_init(pdev, &fops);
ret = cdev_add(pdev, devno, count);
if(ret < 0)
{
printk("Failed to cdev_add.");
goto err2;
}
pclass = class_create(THIS_MODULE, "myclass");
if(IS_ERR(pclass))
{
printk("Failed to class_create.\n");
ret = PTR_ERR(pclass);
goto err3;
}
pdevice = device_create(pclass, NULL, devno, NULL, "hello");
if(IS_ERR(pdevice))
{
printk("Failed to device_create.\n");
ret = PTR_ERR(pdevice);
goto err4;
}
sema_init(&sem_r, 0);
sema_init(&sem_w, 1);
// 绛夊緟闃熷垪鍒濆鍖?
init_waitqueue_head(&wq);
return 0;
err4:
class_destroy(pclass);
err3:
cdev_del(pdev);
err2:
kfree(pdev);
err1:
unregister_chrdev_region(devno, count);
return ret;
}
static void __exit demo_exit(void)
{
printk("%s,%d\n", __func__, __LINE__);
device_destroy(pclass, devno);
class_destroy(pclass);
cdev_del(pdev);
kfree(pdev);
unregister_chrdev_region(devno, count);
}
module_init(demo_init);
module_exit(demo_exit);
应用层:将读终端数据和读fifo数据放到select当中
#include <stdio.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <fcntl.h>
#include <string.h>
#include <sys/select.h>
#define N 128
int main(int argc, const char *argv[])
{
int fd;
char buf[N] = {};
char rbuf[N] = {};
fd_set readfds;
int maxfd;
fd = open("/dev/hello", O_RDWR);
if(fd < 0)
{
perror("Failed to open.");
return -1;
}
else
{
printf("open success.\n");
}
FD_ZERO(&readfds);
maxfd = fd;
while(1)
{
FD_SET(0, &readfds);
FD_SET(fd, &readfds);
if(select(maxfd+1, &readfds, NULL, NULL, NULL) < 0)
{
perror("Failed to select.");
return -1;
}
if(FD_ISSET(0, &readfds))
{
fgets(buf, N, stdin);
printf("buf:%s", buf);
}
if(FD_ISSET(fd, &readfds))
{
read(fd, rbuf, N);
printf("rbuf:%s\n", rbuf);
}
}
close(fd);
return 0;
}
光这么写呢,会看着太简单了点,我们来看看内核究竟是如何实现的。
poll_wait函数并不会引起阻塞。它只是把当前等待队列头加入到poll_table中。真正的阻塞是在调用select函数时实现的。其实从结果倒推也能明白,如果在poll_wait就阻塞了,那还如何实现select监测多个设备呢?
select系统调用的过程是sys_select() -> core_sys_select() -> do_select()。
select的底层机制用的是我们上一节讲的等待队列,Linux驱动(六)设备驱动中的阻塞与非阻塞IO。do_select函数的基本流程如下。我们用文字来表述一下。do_select会遍历文件描述符集合中的所有文件描述符,查看文件中底层poll驱动函数,我们在poll驱动函数干嘛了呢?再提一遍:
1、使用poll_wait()函数,将对应的等待队列头部添加到poll_table中去,这样,任意一个文件描述符(文件描述符集合中的)有wakeup唤醒系列函数唤醒等待队列,都能将select函数唤醒。
2、返回一个掩码,来表示当前设备是否可读、可写等,有这个我们才能真正的跳出do_select函数。
查看poll中设置的标志位,有可以用的标志位就设置相应的标志XXX。直到将所有poll函数都遍历完毕。
查看相应的标志XXX,如果有被置上,说明有资源可用(那个可用,交给应用去判断),退出。
无资源可用,休眠等待。直到被唤醒。
我们再来看看代码实现
int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
{
ktime_t expire, *to = NULL;
struct poll_wqueues table;
poll_table *wait;
int retval, i, timed_out = 0;
unsigned long slack = 0;
rcu_read_lock();
retval = max_select_fd(n, fds);//获取最大遍历个数+++++++++++++++
rcu_read_unlock();
if (retval < 0)
return retval;
n = retval;
poll_initwait(&table); //初始化table表++++++++++++++++++
wait = &table.pt;
if (end_time && !end_time->tv_sec && !end_time->tv_nsec) {
wait = NULL;
timed_out = 1;
}
if (end_time && !timed_out)
slack = select_estimate_accuracy(end_time);
retval = 0;
for (;;) {
unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp;
inp = fds->in; outp = fds->out; exp = fds->ex;
rinp = fds->res_in; routp = fds->res_out; rexp = fds->res_ex;
//开始遍历所有的文件描述符
for (i = 0; i < n; ++rinp, ++routp, ++rexp) {
unsigned long in, out, ex, all_bits, bit = 1, mask, j;
unsigned long res_in = 0, res_out = 0, res_ex = 0;
const struct file_operations *f_op = NULL;
struct file *file = NULL;
in = *inp++; out = *outp++; ex = *exp++;
all_bits = in | out | ex;
if (all_bits == 0) {
i += __NFDBITS;
continue;
}
for (j = 0; j < __NFDBITS; ++j, ++i, bit <<= 1) {
int fput_needed;
if (i >= n)
break;
if (!(bit & all_bits))
continue;
file = fget_light(i, &fput_needed);
if (file) {
f_op = file->f_op;
mask = DEFAULT_POLLMASK;
if (f_op && f_op->poll) {//进入底层poll函数+++++++++++++++++++
wait_key_set(wait, in, out, bit);
mask = (*f_op->poll)(file, wait);
}
fput_light(file, fput_needed);
/*下面判断相应的标志位*/ if ((mask & POLLIN_SET) && (in & bit)) {
res_in |= bit;
retval++;
wait = NULL;
}
if ((mask & POLLOUT_SET) && (out & bit)) {
res_out |= bit;
retval++;
wait = NULL;
}
if ((mask & POLLEX_SET) && (ex & bit)) {
res_ex |= bit;
retval++;
wait = NULL;
}
}
}
if (res_in)
*rinp = res_in;
if (res_out)
*routp = res_out;
if (res_ex)
*rexp = res_ex;
cond_resched();
}
wait = NULL;
if (retval || timed_out || signal_pending(current))//是否跳出的条件:1标志位被置 2超时 3 被信号唤醒
break;
if (table.error) {
retval = table.error;
break;
}
/*
* If this is the first loop and we have a timeout
* given, then we convert to ktime_t and set the to
* pointer to the expiry value.
*/
if (end_time && !to) {
expire = timespec_to_ktime(*end_time);
to = &expire;
}
//没有资源可用,休眠当前进程
if (!poll_schedule_timeout(&table, TASK_INTERRUPTIBLE,
to, slack))
timed_out = 1;
}
poll_freewait(&table);
return retval;
}