本文介绍的异步IO是POSIXS的异步IO,它为不同类型的文件进行了异步IO提供了一套一致的方法。
它在提交完IO操作请求后就立即返回,程序不需要等到IO操作完成再去做别的事情,具有非阻塞的特性,当然你也可以等待它的结果,不过这就失去了异步的意义。
当底层把IO操作完成后,可以给提交者发送信号,或者调用注册的回调函数,告知请求提交者IO操作已完成。
在信号处理函数或者回调函数中,可以使用异步IO接口来获得IO的完成情况,比如获取读写操作返回的字节数或错误码、读取的数据等。
用户态程序
app.c,仅以异步读为例,异步写类似。
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
#include <fcntl.h>
#include <errno.h>
#include <unistd.h>
#include <aio.h>
#define FILE_NAME "/dev/mydev"
#define MAX_LEN 64
void aiow_completion_handler(union sigval sigval)
{
int ret;
struct aiocb *req;
req = (struct aiocb *)sigval.sival_ptr;
if(aio_error(req) == 0)
{
ret = aio_return (req) ;
printf ("aio write %d bytes\n", ret);
}
return;
}
void aior_completion_handler(union sigval sigval)
{
int ret;
struct aiocb *req;
req = (struct aiocb *)sigval.sival_ptr;
if (aio_error (req) >= 0 )
{
ret = aio_return(req);
if (ret)
printf("aio read: %s\n", (char * )req->aio_buf );
}
return;
}
int main(int argc, char *argv[])
{
int ret;
int fd;
struct aiocb aior,aiow;
fd = open(FILE_NAME, O_RDWR);
if (0 > fd)
{
printf("Open failed.\n");
return -1;
}
memset (&aior, 0, sizeof (aior));
memset (&aiow, 0, sizeof (aiow));
aiow.aio_fildes = fd;
aiow.aio_buf = malloc(32);
strcpy((char *)aiow.aio_buf,"aio test");
aiow.aio_nbytes = strlen( (char * )aiow.aio_buf) + 1;
aiow.aio_offset = 0;
aiow.aio_sigevent.sigev_notify = SIGEV_THREAD;
aiow.aio_sigevent.sigev_notify_function = aiow_completion_handler;
aiow.aio_sigevent.sigev_notify_attributes = NULL;
aiow.aio_sigevent.sigev_value.sival_ptr = &aiow;
aior.aio_fildes = fd;
aior.aio_buf = malloc(MAX_LEN);
aior.aio_nbytes = MAX_LEN;
aior.aio_offset = 0;
aior.aio_sigevent.sigev_notify = SIGEV_THREAD;
aior.aio_sigevent.sigev_notify_function = aior_completion_handler;
aior.aio_sigevent.sigev_notify_attributes = NULL;
aior.aio_sigevent.sigev_value.sival_ptr = &aior;
while(1){
if(aio_write(&aiow) == -1)
{
perror("aio write");
exit(EXIT_FAILURE);
}
if(aio_read(&aior) == -1)
{
perror("aio read");
exit(EXIT_FAILURE);
}
sleep(1);
printf("aaaa\n");
}
}
驱动程序关键点
aio_read、aio_write对应内核态驱动的操作分别是:
struct file_operations {
ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
}
struct kiocb {
struct file *ki_filp;
/* The 'ki_filp' pointer is shared in a union for aio */
randomized_struct_fields_start
loff_t ki_pos;
void (*ki_complete)(struct kiocb *iocb, long ret, long ret2);
void *private;
int ki_flags;
u16 ki_hint;
u16 ki_ioprio; /* See linux/ioprio.h */
union {
unsigned int ki_cookie; /* for ->iopoll */
struct wait_page_queue *ki_waitq; /* for async buffered IO */
};
randomized_struct_fields_end
};
struct iov_iter {
u8 iter_type;
bool nofault;
bool data_source;
size_t iov_offset;
size_t count;
union {
const struct iovec *iov;
const struct kvec *kvec;
const struct bio_vec *bvec;
struct xarray *xarray;
struct pipe_inode_info *pipe;
};
union {
unsigned long nr_segs;
struct {
unsigned int head;
unsigned int start_head;
};
loff_t xarray_start;
};
};
第一个参数struct kiocb的成员ki_filp是内核中文件的结构体,ki_pos是偏移量,用户态异步IO传入的参数。
第二个参数struct iov_ite迭代器包含了以前版本的成员iov,iov成员包含了数据缓存及长度。
上面是内核5.15.0版本的结构体,新内核又进行了更新。
关键程序如下:
static ssize_t my_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
size_t read = 0;
unsigned long i;
ssize_t ret;
loff_t pos = iocb->ki_pos;
unsigned long nr_segs = to->nr_segs;
printk("my %ld, %ld\n",nr_segs, pos);
for(i = 0; i < nr_segs; i++)
{
ret = my_read(iocb->ki_filp, to->iov[i].iov_base, to->iov[i].iov_len, &pos);
if(ret < 0)
break;
read += ret;
}
return read ? read : -EFAULT;
}
static ssize_t my_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
size_t write = 0;
unsigned long i;
ssize_t ret;
loff_t pos = iocb->ki_pos;
unsigned long nr_segs = from->nr_segs;
for (i = 0;i < nr_segs;i++) {
ret = my_write(iocb->ki_filp, from->iov[i].iov_base, from->iov[i].iov_len, &pos);
if(ret < 0)
break;
write += ret;
}
return write ? write : -EFAULT;
}
static struct file_operations cdev_ops = {
......
.read_iter = my_read_iter,
.write_iter = my_write_iter,
}
完整驱动程序
//head
#include <linux/init.h>
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/cdev.h>
#include <linux/string.h>
#include <linux/wait.h>
#include <linux/poll.h>
#include <linux/uio.h>
#define MAJOR_CHAR 100
#define MINOR_CHAR 0
#define MAX_LEN 64
//异步结构定义
struct fasync_struct *my_fapp;
static int my_fasync(int fd, struct file *flip, int mode);
static int my_open(struct inode *pnode, struct file *pfile)
{
printk("Open cdev.\n");
return 0;
}
static int my_close(struct inode *pnode, struct file *pfile)
{
printk("Close cdev.\n");
my_fasync(-1, pfile, 0);
return 0;
}
static char kbuf[MAX_LEN] = "my aio test!";
static int kbuf_len = 0;
DECLARE_WAIT_QUEUE_HEAD(my_read_queue);
DECLARE_WAIT_QUEUE_HEAD(my_write_queue);
ssize_t my_read(struct file *pf, char __user *ubuf, size_t len, loff_t *pl)
{
int ret = -1;
if (MAX_LEN < len)
{
printk("len is large than %d.\n", MAX_LEN);
return -1;
}
//判断设备缓存是否为空
if (0 == kbuf_len)
{
//如果是非阻塞操作,返回-EAGAIN,用户态轮询
if (pf->f_flags& O_NONBLOCK)
return -EAGAIN;
else
{
//如果是阻塞操作,等待
if (wait_event_interruptible(my_read_queue, 0!= kbuf_len))
return -ERESTARTSYS; //信号唤醒,通知文件系统层做相应处理
}
}
ret = copy_to_user(ubuf, kbuf, len);
if (0 != ret)
{
printk("Copy to user failed.\n");
return -1;
}
//读出后,把对应的设备数据清空,保留未读出数据
memcpy(kbuf, kbuf + len, MAX_LEN - len);
memset(kbuf + MAX_LEN - len, 0, len);
//计算设备缓存长度
if (kbuf_len > len)
kbuf_len -= len;
else
kbuf_len = 0;
//当缓存长度不满时,唤醒写队列
if (MAX_LEN != kbuf_len)
{
wake_up_interruptible(&my_write_queue);
//发送可写通知
kill_fasync(&my_fapp, SIGIO, POLL_OUT);
}
return len;
}
ssize_t my_write(struct file *pf, const char __user *ubuf, size_t len, loff_t *pl)
{
int ret = -1;
//判断是否会写超过
if (MAX_LEN < len + kbuf_len)
{
printk("len is large than %d.\n", MAX_LEN);
return -1;
}
if (MAX_LEN == kbuf_len)
{
//如果是非阻塞操作,返回-EAGAIN,用户态轮询
if (pf->f_flags& O_NONBLOCK)
return -EAGAIN;
else
{
//如果是阻塞操作,等待
if (wait_event_interruptible(my_write_queue, MAX_LEN != kbuf_len))
return -ERESTARTSYS; //信号唤醒,通知文件系统层做相应处理
}
}
ret = copy_from_user(kbuf + kbuf_len, ubuf, len);
if (0 != ret)
{
printk("Copy from user failed.\n");
return -1;
}
kbuf_len += len;
//当缓存长度不为空时,唤醒读队列
if (0 != kbuf_len)
{
wake_up_interruptible(&my_read_queue);
//发送可读通知
kill_fasync(&my_fapp, SIGIO, POLL_IN);
}
printk("bbb\n");
return len;
}
static unsigned int my_poll(struct file *filp, struct poll_table_struct *wait)
{
unsigned int mask = 0;
poll_wait(filp, &my_write_queue, wait);//加写等待队列头
poll_wait(filp, &my_read_queue, wait);//加读等待队列头
if (0 != kbuf_len)//可读
{
mask |= POLLIN | POLLRDNORM; /*标示数据可获得*/
}
if (MAX_LEN != kbuf_len)//可写
{
mask |= POLLOUT | POLLWRNORM; /*标示数据可写入*/
}
return mask;
}
//异步初始化
static int my_fasync(int fd, struct file *flip, int mode)
{
return fasync_helper(fd, flip, mode, &my_fapp);
}
static ssize_t my_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
size_t read = 0;
unsigned long i;
ssize_t ret;
loff_t pos = iocb->ki_pos;
unsigned long nr_segs = to->nr_segs;
printk("my %ld, %ld\n",nr_segs, pos);
for(i = 0; i < nr_segs; i++)
{
ret = my_read(iocb->ki_filp, to->iov[i].iov_base, to->iov[i].iov_len, &pos);
if(ret < 0)
break;
read += ret;
}
return read ? read : -EFAULT;
}
static ssize_t my_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
size_t write = 0;
unsigned long i;
ssize_t ret;
loff_t pos = iocb->ki_pos;
unsigned long nr_segs = from->nr_segs;
for (i = 0;i < nr_segs;i++) {
ret = my_write(iocb->ki_filp, from->iov[i].iov_base, from->iov[i].iov_len, &pos);
if(ret < 0)
break;
write += ret;
}
return write ? write : -EFAULT;
}
struct cdev cdevice;
struct file_operations cdev_ops = {
.open = my_open,
.release = my_close,
.read = my_read,
.write = my_write,
.poll = my_poll,
.fasync = my_fasync,
.read_iter = my_read_iter,
.write_iter = my_write_iter,
};
//加载
static int hello_init(void)
{
dev_t devno = MKDEV(MAJOR_CHAR,MINOR_CHAR);
int ret = -1;
printk(KERN_ALERT "Hello World.\n");
//up kernel
//1、注册设备号
ret = register_chrdev_region(devno, 1, "hello");
if (0 != ret)
{
printk("Register char device failed.\n");
return ret;
}
//2、初始化字符设备结构体
cdev_init(&cdevice, &cdev_ops);
cdevice.owner = THIS_MODULE;
//3、添加字符设备结构体给内核
ret = cdev_add(&cdevice,devno , 1);
if (0 != ret)
{
//注意释放设备号
unregister_chrdev_region(devno,1);
printk("Unregister char device.\n");
return ret;
}
printk("Register char device success.\n");
//down hardware
return 0;
}
//卸载函数(必须)
static void hello_exit(void)//返回值是void类型,函数名自定义,参数是void
{
dev_t devno = MKDEV(MAJOR_CHAR, MINOR_CHAR);
printk(KERN_ALERT "Goodbye World.\n");
// down hardware
// up kernel
//1、从内核中删除字符设备结构体
cdev_del(&cdevice);
//2、注销设备号
unregister_chrdev_region(devno, 1);
}
//注册(必须)
module_init(hello_init);
module_exit(hello_exit);
//license(必须)
MODULE_LICENSE("GPL");
//作者与描述(可选)
MODULE_AUTHOR("Ono Zhang");
MODULE_DESCRIPTION("A simple Hello World Module");
运行结果
$ sudo ./a.out
aio read: aio test
aio write 9 bytes
aaaa
aio read: aio test
aio write 9 bytes
aaaa
aio write 9 bytes
aio read: aio test
aaaa
aio read: aio test
aio write 9 bytes
aaaa
aio write 9 bytes
aio read: aio test
tobecontinue
每周三、周六更新