提升文件IO性能的技巧

最新推荐文章于 2024-07-11 21:47:44 发布

xiaosanxian

最新推荐文章于 2024-07-11 21:47:44 发布

阅读量7

点赞数

分类专栏：嵌入式Linux C/C++ 文章标签： windows 服务器 linux

原文链接：https://mp.weixin.qq.com/s/ABt0F0i5tXnnGEjLQSqe5w

版权

C/C++ 同时被 2 个专栏收录

9 篇文章 2 订阅

订阅专栏

嵌入式Linux

8 篇文章 0 订阅

订阅专栏

Linux下的AIO基本概念

在异步I/O中，你可以同时初始化多个I/O通道。这样每个I/O通道都需要保存一个唯一的上下文，以便于当I/O操作完成后你能够识别时哪一个I/O通道。在AIO中这个上下文就是aiocb(AIO I/O Control Block) 结构体。这个结构体保存了每个I/O通道的所有信息包括用于缓存数据的用户空间缓冲区。当I/O操作完成时，内核会提供这个I/O通道特定的aiocb结构体。

AIO模型

异步IO则采用订阅-通知模式：即应用程序向操作系统注册IO监听，然后继续做自己的事情。当操作系统发生IO事件，并且准备好数据后，在主动通知应用程序，触发相应的函数。过程如下图所示：

AIO的API

上面的每个API函数都是通过aiocb结构体来初始化或者查询状态的。结构体struct aiocb如下所示：

应用示例

aio_read.c 异步读操作

#include<stdio.h>
#include<sys/socket.h>
#include<netinet/in.h>
#include<arpa/inet.h>
#include<assert.h>
#include<unistd.h>
#include<stdlib.h>
#include<errno.h>
#include<string.h>
#include<sys/types.h>
#include<fcntl.h>
#include<aio.h>
  
#define BUFFER_SIZE 1024
 
int MAX_LIST = 2;
 
int main(int argc,char **argv)
{
    //aio操作所需结构体
    struct aiocb rd;
 
    int fd,ret,couter;
 
    fd = open("test.txt",O_RDONLY);
    if(fd < 0)
    {
        perror("test.txt");
    }
 
    //将rd结构体清空
    bzero(&rd,sizeof(rd));
 
    //为rd.aio_buf分配空间
    rd.aio_buf = malloc(BUFFER_SIZE + 1);
 
    //填充rd结构体
    rd.aio_fildes = fd;
    rd.aio_nbytes =  BUFFER_SIZE;
    rd.aio_offset = 0;
 
    //进行异步读操作
    ret = aio_read(&rd);
    if(ret < 0)
    {
        perror("aio_read");
        exit(1);
    }
    
    couter = 0;
    //  循环等待异步读操作结束
    while(aio_error(&rd) == EINPROGRESS)
    {
        printf("第%d次: %s\n",++couter, rd.aio_buf);
    }
    //获取异步读返回值
    ret = aio_return(&rd);
    
    printf("\n\n返回值为:%d",ret);
    return 0;
}

编译gcc -o aio_read aio_read.c -lrt

aio_write.c 异步写操作

#include<stdio.h>
#include<sys/socket.h>
#include<netinet/in.h>
#include<arpa/inet.h>
#include<assert.h>
#include<unistd.h>
#include<stdlib.h>
#include<errno.h>
#include<string.h>
#include<sys/types.h>
#include<fcntl.h>
#include<aio.h>
 
#define BUFFER_SIZE 1025
 
int main(int argc,char **argv)
{
    //定义aio控制块结构体
    struct aiocb wr;
 
    int ret,fd;
 
    char str[20] = {"hello,world"};
 
    //置零wr结构体
    bzero(&wr,sizeof(wr));
 
    //追加写
    fd = open("test.txt",O_WRONLY | O_APPEND);
    if(fd < 0)
    {
        perror("test.txt");
    }
 
    //为aio.buf申请空间
    wr.aio_buf = (char *)malloc(BUFFER_SIZE);
    if(wr.aio_buf == NULL)
    {
        perror("buf");
    }
 
    wr.aio_buf = str;
 
    //填充aiocb结构
    wr.aio_fildes = fd;
    wr.aio_nbytes = 1024;
 
    //异步写操作
    ret = aio_write(&wr);
    if(ret < 0)
    {
        perror("aio_write");
    }
 
    //等待异步写完成
    while(aio_error(&wr) == EINPROGRESS)
    {
        printf("hello,world\n");
    }
 
    //获得异步写的返回值
    ret = aio_return(&wr);
    printf("\n\n\n返回值为:%d\n",ret);
 }

编译gcc -o aio_write aio_write.c -lrt

aio_suspend.c 异步阻塞IO

aio_suspend 函数来挂起（或阻塞）调用进程，直到异步请求完成为止，此时会产生一个信号，或者发生其他超时操作。调用者提供了一个 aiocb 引用列表，其中任何一个完成都会导致 aio_suspend 返回

#include<stdio.h>
#include<sys/socket.h>
#include<netinet/in.h>
#include<arpa/inet.h>
#include<assert.h>
#include<unistd.h>
#include<stdlib.h>
#include<errno.h>
#include<string.h>
#include<sys/types.h>
#include<fcntl.h>
#include<aio.h> 
 
#define BUFFER_SIZE 1024
 
int MAX_LIST = 2;
 
int main(int argc,char **argv)
{
    //aio操作所需结构体
    struct aiocb rd;
 
    int fd,ret,couter;
 
    //cblist链表
    struct aiocb *aiocb_list[2];
 
    fd = open("test.txt",O_RDONLY);
    if(fd < 0)
    {
        perror("test.txt");
    }
 
    //将rd结构体清空
    bzero(&rd,sizeof(rd));
 
    //为rd.aio_buf分配空间
    rd.aio_buf = malloc(BUFFER_SIZE + 1);
 
    //填充rd结构体
    rd.aio_fildes = fd;
    rd.aio_nbytes =  BUFFER_SIZE;
    rd.aio_offset = 0;
 
    //将读fd的事件注册
    aiocb_list[0] = &rd;
 
    //进行异步读操作
    ret = aio_read(&rd);
    if(ret < 0)
    {
        perror("aio_read");
        exit(1);
    }
 
    couter = 0;
    //  循环等待异步读操作结束
    while(aio_error(&rd) == EINPROGRESS)
    {
        printf("第%d次， %s\n",++couter, rd.aio_buf);
    }
 
    printf("我要开始等待异步读事件完成\n");
    //阻塞等待异步读事件完成
    ret = aio_suspend(aiocb_list,MAX_LIST,NULL);
 
    //获取异步读返回值
    ret = aio_return(&rd);
 
    printf("\n\n返回值为:%d\n",ret);
 }

编译gcc -o aio_suspend aio_suspend.c -lrt

lio_listio 同时发起多个异步IO请求

意味着我们可以在一个系统调用（一次内核上下文切换）中启动大量的 I/O 操作。从性能的角度来看，这非常重要

int lio_listio(int mode, struct aiocb *const aiocb_list[],
                      int nitems, struct sigevent *sevp);

• 第一个参数 mode ：LIO_WAIT(阻塞直到所有的IO都完成) 或 LIO_NOWAIT(不阻塞，在进入排队队列后就返回)
• 第二个参数 list ：异步IO请求队列．
• 第三个参数nitems ：异步IO请求队列长度
• 第四个参数sevp ：定义了在所有 I/O 操作都完成时产生信号的方法。

#include<stdio.h>
#include<sys/socket.h>
#include<netinet/in.h>
#include<arpa/inet.h>
#include<assert.h>
#include<unistd.h>
#include<stdlib.h>
#include<errno.h>
#include<string.h>
#include<sys/types.h>
#include<fcntl.h>
#include<aio.h>
 
#define BUFFER_SIZE 1025
 
int MAX_LIST = 2;
 
int main(int argc,char **argv)
{
    struct aiocb *listio[2];
    struct aiocb rd,wr;
    int fd,ret;
 
    //异步读事件
    fd = open("test1.txt",O_RDONLY);
    if(fd < 0)
    {
        perror("test1.txt");
    }
 
    bzero(&rd,sizeof(rd));
 
    rd.aio_buf = (char *)malloc(BUFFER_SIZE);
    if(rd.aio_buf == NULL)
    {
        perror("aio_buf");
    }
 
    rd.aio_fildes = fd;
    rd.aio_nbytes = 1024;
    rd.aio_offset = 0;
    rd.aio_lio_opcode = LIO_READ;   ///lio操作类型为异步读
 
    //将异步读事件添加到list中
    listio[0] = &rd;
 
    //异步些事件
    fd = open("test2.txt",O_WRONLY | O_APPEND);
    if(fd < 0)
    {
        perror("test2.txt");
    }
 
    bzero(&wr,sizeof(wr));
 
    wr.aio_buf = (char *)malloc(BUFFER_SIZE);
    if(wr.aio_buf == NULL)
    {
        perror("aio_buf");
    }
 
    wr.aio_fildes = fd;
    wr.aio_nbytes = 1024;
 
    wr.aio_lio_opcode = LIO_WRITE;   ///lio操作类型为异步写
 
    //将异步写事件添加到list中
    listio[1] = &wr;
 
    //使用lio_listio发起一系列请求
    ret = lio_listio(LIO_WAIT,listio,MAX_LIST,NULL);
 
    //当异步读写都完成时获取他们的返回值
 
    ret = aio_return(&rd);
    printf("\n读返回值:%d",ret);
 
    ret = aio_return(&wr);
    printf("\n写返回值:%d",ret);
 
    return 0;
}

编译gcc -o aio_suspend aio_suspend.c -lrt

AIO通知机制

异步与同步的区别就是我们不需要等待异步操作返回就可以继续干其他的事情，当异步操作完成时可以通知我们去处理它。

主要有两种通知方式：

• 信号
• 函数回调

(1)信号通知
在发起异步请求时，可以指定当异步操作完成时给调用进程发送什么信号，这样调用收到此信号就会执行相应的信号处理函数．很明显，这是很不好的，因为在系统中可能会有很多很多信号,而且必须要注意的是在信号处理的时候一定不要有任何阻塞操作，不然就会导致整个进程阻塞（中断）

#include <aio.h>
#include <fcntl.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <unistd.h>
#include <signal.h>
#include <stdlib.h>

#define BUFFER_SIZE 1024

// 定义信号处理函数
void aio_completion_handler(int signo, siginfo_t *info, void *context) {
    struct aiocb *req;
    int ret;

    if (info->si_code == SI_ASYNCIO) {
        req = (struct aiocb *)info->si_value.sival_ptr;

        // 检查I/O操作是否成功完成
        if ((ret = aio_error(req)) == 0) {
            // 获取返回值
            ret = aio_return(req);
            printf("AIO operation completed successfully, read %d bytes.\n", ret);
            printf("Data: %.*s\n", ret, (char *)req->aio_buf);
        } else {
            printf("AIO operation failed: %s\n", strerror(ret));
        }

        // 释放分配的缓冲区
        free((void *)req->aio_buf);
    }
}

int main() {
    int fd;
    struct aiocb cb;
    char *buffer;
    struct sigaction sa;

    fd = open("testfile.txt", O_RDONLY);
    if (fd < 0) {
        perror("open");
        return 1;
    }

    // 分配缓冲区
    buffer = (char *)malloc(BUFFER_SIZE);
    if (!buffer) {
        perror("malloc");
        close(fd);
        return 1;
    }

    // 清零aiocb结构体
    memset(&cb, 0, sizeof(cb));
    cb.aio_fildes = fd;
    cb.aio_buf = buffer;
    cb.aio_nbytes = BUFFER_SIZE;
    cb.aio_offset = 0;

    // 设置信号处理函数
    sa.sa_flags = SA_SIGINFO;
    sa.sa_sigaction = aio_completion_handler;
    sigemptyset(&sa.sa_mask);
    sigaction(SIGUSR1, &sa, NULL);

    // 设置通知方式为信号通知
    cb.aio_sigevent.sigev_notify = SIGEV_SIGNAL;
    cb.aio_sigevent.sigev_signo = SIGUSR1;
    cb.aio_sigevent.sigev_value.sival_ptr = &cb;

    // 发起异步读操作
    if (aio_read(&cb) == -1) {
        perror("aio_read");
        free(buffer);
        close(fd);
        return 1;
    }

    // 等待异步I/O操作完成
    while (aio_error(&cb) == EINPROGRESS) {
        // 做其他事情，异步I/O操作在后台进行
        usleep(10000); // 睡眠10毫秒
    }

    // 关闭文件描述符
    close(fd);
    return 0;
}

编译 gcc -o aio_sigaction aio_sigaction.c -lrt

(2)线程函数回调
顾名思义．．．就是我们经常写的回调函数（会另外开一个线程去处理，所以不会阻塞当前进程）

#include <aio.h>
#include <fcntl.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <unistd.h>
#include <stdlib.h>

#define BUFFER_SIZE 1024

// 定义回调函数
void aio_completion_handler(sigval_t sigval) {
    struct aiocb *req;
    int ret;

    req = (struct aiocb *)sigval.sival_ptr;

    // 检查I/O操作是否成功完成
    if ((ret = aio_error(req)) == 0) {
        // 获取返回值
        ret = aio_return(req);
        printf("AIO operation completed successfully, read %d bytes.\n", ret);
        printf("Data: %.*s\n", ret, (char *)req->aio_buf);
    } else {
        printf("AIO operation failed: %s\n", strerror(ret));
    }

    // 释放分配的缓冲区
    free((void *)req->aio_buf);
}

int main() {
    int fd;
    struct aiocb cb;
    char *buffer;

    fd = open("testfile.txt", O_RDONLY);
    if (fd < 0) {
        perror("open");
        return 1;
    }

    // 分配缓冲区
    buffer = (char *)malloc(BUFFER_SIZE);
    if (!buffer) {
        perror("malloc");
        close(fd);
        return 1;
    }

    // 清零aiocb结构体
    memset(&cb, 0, sizeof(cb));
    cb.aio_fildes = fd;
    cb.aio_buf = buffer;
    cb.aio_nbytes = BUFFER_SIZE;
    cb.aio_offset = 0;

    // 设置通知方式为回调函数
    cb.aio_sigevent.sigev_notify = SIGEV_THREAD;
    cb.aio_sigevent.sigev_notify_function = aio_completion_handler;
    cb.aio_sigevent.sigev_notify_attributes = NULL;
    cb.aio_sigevent.sigev_value.sival_ptr = &cb;

    // 发起异步读操作
    if (aio_read(&cb) == -1) {
        perror("aio_read");
        free(buffer);
        close(fd);
        return 1;
    }

    // 等待异步I/O操作完成
    while (aio_error(&cb) == EINPROGRESS) {
        // 做其他事情，异步I/O操作在后台进行
        usleep(10000); // 睡眠10毫秒
    }

    // 关闭文件描述符
    close(fd);
    return 0;
}

总结

Linux的异步I/O操作提供了一种高效的I/O处理机制，允许应用程序在不阻塞的情况下发起和处理I/O操作，使用异步 I/O 可以帮助我们构建 I/O 速度更快、效率更高的应用程序。如果我们的应用程序可以对处理和 I/O 操作重叠进行，那么 AIO 就可以帮助我们构建可以更高效地使用可用 CPU 资源的应用程序。尽管这种 I/O 模型与在大部分 Linux 应用程序中使用的传统阻塞模式都不同，但是异步通知模型在概念上来说却非常简单，可以简化我们的设计。