Python异步生成器在实际项目中的应用详解-CSDN博客

本文链接：https://blog.csdn.net/sinat_26368147/article/details/147566810

Python异步生成器在实际项目中的深度应用指南

异步生成器作为Python异步编程的核心特性之一，在现代软件开发中有着广泛而深入的应用场景。本文将全面详细地探讨异步生成器在各种实际项目中的具体应用，包括Web开发、数据处理、系统监控、金融科技等多个领域。

1. Web开发与API服务

1.1 流式HTTP响应

场景：处理大文件下载或实时数据推送

from fastapi import FastAPI
from fastapi.responses import StreamingResponse
import aiofiles

app = FastAPI()

async def large_file_generator(file_path, chunk_size=1024*1024):
    """异步生成大文件分块"""
    async with aiofiles.open(file_path, 'rb') as f:
        while True:
            chunk = await f.read(chunk_size)
            if not chunk:
                break
            yield chunk

@app.get("/download/{filename}")
async def download_large_file(filename: str):
    file_path = f"/data/{
     filename}"
    return StreamingResponse(
        large_file_generator(file_path),
        media_type="application/octet-stream",
        headers={
   "Content-Disposition": f"attachment; filename={
     filename}"}
    )

技术要点：

使用aiofiles实现异步文件I/O
按固定大小分块读取避免内存溢出
支持断点续传（通过Range头处理）
适用于视频流、大型日志文件等场景

1.2 WebSocket实时数据推送

from fastapi import WebSocket
import asyncio
import json

async def stock_price_generator(symbol):
    """模拟股票价格实时生成器"""
    price = 100.0
    while True:
        await asyncio.sleep(0.5)
        price += random.uniform(-1, 1)
        yield json.dumps({
   
            "symbol": symbol,
            "price": round(price, 2),
            "timestamp": datetime.now().isoformat()
        })

@app.websocket("/ws/stocks/{symbol}")
async def websocket_stock_price(websocket: WebSocket, symbol: str):
    await websocket.accept()
    try:
        async for price_update in stock_price_generator(symbol):
            await websocket.send_text(price_update)
    except WebSocketDisconnect:
        print(f"客户端断开连接: {
     symbol}")

应用场景：

实时金融数据推送
多人协作编辑通知
实时游戏状态更新
IoT设备状态监控

2. 数据处理与ETL管道

2.1 数据库流式导出

import asyncpg
from asyncpg import cursor

async def pg_streaming_export(connection_params, query, batch_size=1000):
    """PostgreSQL流式数据导出"""
    conn = await asyncpg.connect(**connection_params)
    async with conn.transaction():
        cursor = await conn.cursor(query)
        while True:
            records = await cursor.fetch(batch_size)
            if not records:
                break
            yield records
    await conn.close()

async def export_to_parquet():
    """导出为Parquet文件"""
    query = "SELECT * FROM large_financial_transactions"
    exporter = pg_streaming_export(
        {
   "host": "db", "user": "user", "password": "pass"},
        query
    )
    
    writer = None
    async for batch in exporter:
        df = pd.DataFrame(batch)
        if writer is None:
            writer = pd.io.parquet.ParquetWriter(
                "output.parquet",
                df.dtypes,
                compression='snappy'
            )
        writer.write_table(df)
    
    if writer:
        writer.close()

性能优化：

使用服务器端游标避免客户端内存压力
批量处理提高吞吐量
支持断点续传（记录最后处理ID）
可扩展为分布式ETL任务

2.2 实时数据转换管道

class DataPipeline:
    def __init__(self, extractor, transformers, loader):
        self.extractor = extractor
        self.transformers = transformers
        self.loader = loader
    
    async def process(self):
        async for batch in self.extractor:
            for transformer in self.transformers:
                batch = await transformer(batch)
            await self.loader(batch)

async def kafka_extractor(topic):
    """从Kafka读取数据"""
    consumer = AIOKafkaConsumer(
        topic,
        bootstrap_servers='kafka:9092',
        group_id="etl-group"
    )
    await consumer.start()
    try:
        async for msg in consumer:
            yield json.loads(msg.value)
    finally:
        await consumer.stop()

async def json_to_avro_transformer(schema):
    """JSON转Avro格式"""
    async for record in self.source:
        yield fastavro.schemaless_writer(
            io.BytesIO(),
            schema,
            record
        ).getvalue()

async def s3_loader(bucket):
    """写入S3存储"""
    session = aiobotocore.get_session()
    async with session.create_client('s3') as client:
        async for data in self.source:
            await client.put_object(
                Bucket=bucket,
                Key=f"output/{
     uuid.uuid4()}.avro",
                Body=data
            )

# 使用示例
pipeline = DataPipeline(
    extractor=kafka_extractor("transactions"),
    transformers=[json_to_avro_transformer(schema)],
    loader=s3_loader("data-lake")
)
await pipeline.process()

3. 系统监控与日志处理

3.1 分布式日志聚合

async def tail_logfile(file_path):
    """异步跟踪日志文件变化"""
    async with aiofiles.open(file_path, 'r') as f:
        await f.seek(0, 2)  # 跳到文件末尾
        while True:
            line = await f.readline()
            if line:
                yield line.strip()
            else:
                await asyncio.sleep(0.1)

async def log_processor():
    """日志处理管道"""
    log_sources = [
        tail_logfile("/var/log/app1.log"),
        tail_logfile("/var/log/app2.log")
    ]
    
    async for line in merge_async_generators(*log_sources):
        # 解析日志条目
        log_entry = parse_log_line(line)
        
        # 发送到Elasticsearch
        await elastic.index(
            index="logs-"+datetime.now().strftime("%Y-%m-%d"),
            body=log_entry
        )
        
        # 异常告警
        if log_entry.</