Python : async和await、asyncio与aiofiles

songroom

已于 2022-03-03 16:27:35 修改

阅读量1.4k

点赞数 1

分类专栏： python 文章标签： python 开发语言后端

于 2022-01-09 19:50:34 首次发布

本文链接：https://blog.csdn.net/wowotuo/article/details/122397930

版权

python 专栏收录该内容

79 篇文章 6 订阅

订阅专栏

aiofiles、aiohttp、asyncio是python异步生态的三件套。其中，asycnio运行时框架，aifiles主要是io读写库；aiohttp主要是客户端web库（sanic 是web服务端异步库）。
一、真假异步

import asyncio
import time
import math
import pandas as pd

## cpu compute type
async def async_main(n):
    total = 0
    for i in range(n):
        total = total+ math.sin(i)
    return total

async def async_main_k_times(n,k):
    tasks = []
    for i in range(k):
        task = asyncio.create_task(async_main(n))
        tasks.append(task)
    L = await asyncio.gather(*tasks)
    return L

def sync_main(n):
    total = 0
    for i in range(n):
        total = total+ math.sin(i)
    return total       

## IO type
file = r"C:\Users\songroom\Desktop\scores.csv"
async def async_io():
    df = pd.read_csv(file,sep =",",encoding="ANSI")
    return df

async def async_io_k_times(counts):
    tasks = []
    for i in range(counts):
        task = asyncio.create_task(async_io())
        tasks.append(task)
    L = await asyncio.gather(*tasks)
    return L
def sync_io():
    df = pd.read_csv(file,sep =",",encoding="ANSI")
    return df

n = 1000000
k = 10
t0 = time.time()
asyncio.run(async_main(n))
t1 = time.time()
asyncio.run(async_main_k_times(n,k))
t2 = time.time()
sync_main(n)
t3 = time.time()
print(f"cpu compute type:  n :{n} k:{k}")
print(f"async_main: cost time {t1-t0} ")
print(f"async_main_k_times : cost time {t2-t1} ")
print(f"sync_main: cost time {t3-t2} ")

counts = 100
t4 = time.time()
asyncio.run(async_io())
t5 = time.time()
sync_io()
t6 = time.time()
asyncio.run(async_io_k_times(counts))
t7 = time.time()
print(f"io type :  io counts :{counts}")
print(f"async_io: cost time {t5-t4} ")
print(f"async_io_k_times : cost time {t7-t6} ")
print(f"sync_io: cost time {t6-t5} ")

在这里插入图片描述为什么会出现异步后，效率反而下降了？其实，最主要的原因，上面核心代码并没有实现真正的异步。比如，pd.read_csv，这是一个同步堵塞的IO操作。

二、如何真异步io
aiofiles是python的异步io库，,具体详细资料可见：

https://github.com/Tinche/aiofiles

主要使用方法如下：

async def asyc_wirte(file):
    # 异步方式执行with操作,修改为 async with
    async with aiofiles.open(file,"w",encoding="utf-8") as fp:
        await fp.write("hello world ")
        print("数据写入成功")

async def asyc_read(file):
    async with aiofiles.open(file,"r",encoding="utf-8") as fp:
        content = await fp.read()
        print(content)

async def asyn_read_line(file):
    async with aiofiles.open(file,"r",encoding="utf-8") as fp:
        # 读取每行
        async for line in fp:
            print(line)

可以把上面的代码重新改写一下，csv读部分变成真异步（同时换了一个更大size的csv文件）：

import asyncio
import time
import math
import pandas as pd
import aiofiles
import io

## cpu compute type
async def async_main(n):
    total = 0
    for i in range(n):
        total = total+ math.sin(i)
    return total

async def async_main_k_times(n,k):
    tasks = []
    for i in range(k):
        task = asyncio.create_task(async_main(n))
        tasks.append(task)
    L = await asyncio.gather(*tasks)
    return L


def sync_main(n):
    total = 0
    for i in range(n):
        total = total+ math.sin(i)
    return total       

## pandas read_csv

async def async_pandas_io(file):
    df = pd.read_csv(file,sep =",",encoding="ANSI")
    return df

async def async_pandas_io_k_times(counts,file):
    tasks = []
    for i in range(counts):
        task = asyncio.create_task(async_pandas_io(file))
        tasks.append(task)
    L = await asyncio.gather(*tasks)
    return L
## pandas 同步read_csv
def sync_pandas_io(file):
    df = pd.read_csv(file,sep =",",encoding="ANSI")
    return df

## aiofiles read_csv 

async def async_aio_write(file):
    # 异步方式执行with操作,修改为 async with
    async with aiofiles.open(file,"w",encoding="utf-8") as fp:
        await fp.write("hello world ")
        print("数据写入成功")

async def async_aio_read(file):
    async with aiofiles.open(file,"r",encoding="utf-8") as fp:
        content = await fp.read()
        df = pd.read_csv(io.StringIO(content), sep=",")
        #print(df)
        return df

async def async_aio_read_line(file):
    async with aiofiles.open(file,"r",encoding="utf-8") as fp:
        # 读取每行
        async for line in fp:
            print(line)
            
async def async_aio_read_k_times(counts,file):
    tasks = []
    for i in range(counts):
        task = asyncio.create_task(async_aio_read(file))
        tasks.append(task)
    L = await asyncio.gather(*tasks)
    return L

file = r"C:\Users\songroom\Desktop\scores.csv"
n = 1000000
k = 10
t0 = time.time()
asyncio.run(async_main(n))
t1 = time.time()
asyncio.run(async_main_k_times(n,k))
t2 = time.time()
sync_main(n)
t3 = time.time()
print(f"cpu compute type:  n :{n} k:{k}")
print(f"async_main: cost time {t1-t0} ")
print(f"async_main_k_times : cost time {t2-t1} ")
print(f"sync_main: cost time {t3-t2} ")

counts = 100
t4 = time.time()
asyncio.run(async_pandas_io(file))
t5 = time.time()
sync_pandas_io(file)
t6 = time.time()
asyncio.run(async_pandas_io_k_times(counts,file))
t7 = time.time()
asyncio.run(async_aio_read_k_times(counts,file))
t8 = time.time()

print(f"io type :  io counts :{counts}")
print(f"async_pandas_io: cost time {t5-t4} ")
print(f"async_pandas_io_k_times : cost time {t7-t6} ")
print(f"sync_pandas_io: cost time {t6-t5} ")
print(f"async_aio_read: cost time {t8-t7} ")

输出结果如下：

cpu compute type:  n :1000000 k:10
async_main: cost time 0.1399977207183838 
async_main_k_times : cost time 1.7040011882781982 
sync_main: cost time 0.16300249099731445 
io type :  io counts :100
async_pandas_io: cost time 0.02899909019470215 
async_pandas_io_k_times : cost time 0.7789931297302246 
sync_pandas_io: cost time 0.013007164001464844 
async_aio_read: cost time 0.7399976253509521

上面的结果表明：
1、异步的效果：处理100次读csv文件在真异步下，需要0.73秒，还包括转成dataframe的时间，如果只是字符串还更快；和和同步的100次读csv约1.3秒相比，大约时间减少近50%；
2、async_aio_read和async_pandas_io_k_times两者时间差不多，不知道原因为什么，是不是同步在用了线程池后，实际效果和异步下用了线程池的效果差不多所致？

songroom

关注

1
点赞
踩
8

收藏

觉得还不错? 一键收藏
0
评论
Python : async和await、asyncio与aiofiles

import asyncioimport timeimport mathimport pandas as pd## cpu compute typeasync def async_main(n): total = 0 for i in range(n): total = total+ math.sin(i) return totalasync def async_main_k_times(n,k): tasks = [] for i in
复制链接

扫一扫

专栏目录