Python异步生成器在实际项目中的深度应用指南
异步生成器作为Python异步编程的核心特性之一,在现代软件开发中有着广泛而深入的应用场景。本文将全面详细地探讨异步生成器在各种实际项目中的具体应用,包括Web开发、数据处理、系统监控、金融科技等多个领域。
1. Web开发与API服务
1.1 流式HTTP响应
场景:处理大文件下载或实时数据推送
from fastapi import FastAPI
from fastapi.responses import StreamingResponse
import aiofiles
app = FastAPI()
async def large_file_generator(file_path, chunk_size=1024*1024):
"""异步生成大文件分块"""
async with aiofiles.open(file_path, 'rb') as f:
while True:
chunk = await f.read(chunk_size)
if not chunk:
break
yield chunk
@app.get("/download/{filename}")
async def download_large_file(filename: str):
file_path = f"/data/{
filename}"
return StreamingResponse(
large_file_generator(file_path),
media_type="application/octet-stream",
headers={
"Content-Disposition": f"attachment; filename={
filename}"}
)
技术要点:
- 使用
aiofiles
实现异步文件I/O - 按固定大小分块读取避免内存溢出
- 支持断点续传(通过Range头处理)
- 适用于视频流、大型日志文件等场景
1.2 WebSocket实时数据推送
from fastapi import WebSocket
import asyncio
import json
async def stock_price_generator(symbol):
"""模拟股票价格实时生成器"""
price = 100.0
while True:
await asyncio.sleep(0.5)
price += random.uniform(-1, 1)
yield json.dumps({
"symbol": symbol,
"price": round(price, 2),
"timestamp": datetime.now().isoformat()
})
@app.websocket("/ws/stocks/{symbol}")
async def websocket_stock_price(websocket: WebSocket, symbol: str):
await websocket.accept()
try:
async for price_update in stock_price_generator(symbol):
await websocket.send_text(price_update)
except WebSocketDisconnect:
print(f"客户端断开连接: {
symbol}")
应用场景:
- 实时金融数据推送
- 多人协作编辑通知
- 实时游戏状态更新
- IoT设备状态监控
2. 数据处理与ETL管道
2.1 数据库流式导出
import asyncpg
from asyncpg import cursor
async def pg_streaming_export(connection_params, query, batch_size=1000):
"""PostgreSQL流式数据导出"""
conn = await asyncpg.connect(**connection_params)
async with conn.transaction():
cursor = await conn.cursor(query)
while True:
records = await cursor.fetch(batch_size)
if not records:
break
yield records
await conn.close()
async def export_to_parquet():
"""导出为Parquet文件"""
query = "SELECT * FROM large_financial_transactions"
exporter = pg_streaming_export(
{
"host": "db", "user": "user", "password": "pass"},
query
)
writer = None
async for batch in exporter:
df = pd.DataFrame(batch)
if writer is None:
writer = pd.io.parquet.ParquetWriter(
"output.parquet",
df.dtypes,
compression='snappy'
)
writer.write_table(df)
if writer:
writer.close()
性能优化:
- 使用服务器端游标避免客户端内存压力
- 批量处理提高吞吐量
- 支持断点续传(记录最后处理ID)
- 可扩展为分布式ETL任务
2.2 实时数据转换管道
class DataPipeline:
def __init__(self, extractor, transformers, loader):
self.extractor = extractor
self.transformers = transformers
self.loader = loader
async def process(self):
async for batch in self.extractor:
for transformer in self.transformers:
batch = await transformer(batch)
await self.loader(batch)
async def kafka_extractor(topic):
"""从Kafka读取数据"""
consumer = AIOKafkaConsumer(
topic,
bootstrap_servers='kafka:9092',
group_id="etl-group"
)
await consumer.start()
try:
async for msg in consumer:
yield json.loads(msg.value)
finally:
await consumer.stop()
async def json_to_avro_transformer(schema):
"""JSON转Avro格式"""
async for record in self.source:
yield fastavro.schemaless_writer(
io.BytesIO(),
schema,
record
).getvalue()
async def s3_loader(bucket):
"""写入S3存储"""
session = aiobotocore.get_session()
async with session.create_client('s3') as client:
async for data in self.source:
await client.put_object(
Bucket=bucket,
Key=f"output/{
uuid.uuid4()}.avro",
Body=data
)
# 使用示例
pipeline = DataPipeline(
extractor=kafka_extractor("transactions"),
transformers=[json_to_avro_transformer(schema)],
loader=s3_loader("data-lake")
)
await pipeline.process()
3. 系统监控与日志处理
3.1 分布式日志聚合
async def tail_logfile(file_path):
"""异步跟踪日志文件变化"""
async with aiofiles.open(file_path, 'r') as f:
await f.seek(0, 2) # 跳到文件末尾
while True:
line = await f.readline()
if line:
yield line.strip()
else:
await asyncio.sleep(0.1)
async def log_processor():
"""日志处理管道"""
log_sources = [
tail_logfile("/var/log/app1.log"),
tail_logfile("/var/log/app2.log")
]
async for line in merge_async_generators(*log_sources):
# 解析日志条目
log_entry = parse_log_line(line)
# 发送到Elasticsearch
await elastic.index(
index="logs-"+datetime.now().strftime("%Y-%m-%d"),
body=log_entry
)
# 异常告警
if log_entry.</