使用协程发起请求,速度会快很多,我增加了重试机制,自定义并发数,接收响应内容
# -*- coding: utf-8 -*-
import aiohttp
import asyncio
from loguru import logger
headers = {
'accept': 'application/json',
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
'cache-control': 'no-cache',
'content-type': 'application/json; charset=utf-8',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36'
}
async def fetch(session, sem, url):
for attempt in range(3):
try:
async with sem: # 使用信号量限制并发量
async with session.get(url=url, headers=headers, timeout=10, proxy='http://****@http-dyn.abuyun.com:9020') as response:
response_text = await response.text()
if response.status != 200:
raise Exception(f'requests status error:{response.status}')
else:
return url, response_text
except Exception as e:
logger.error(f"Error on attempt {attempt + 1}:{url} {e}")
await asyncio.sleep(1) # 等待一秒后重试
return url, ''
async def main(domain_urls, concurrency=5):
sem = asyncio.Semaphore(concurrency) # 创建信号量
async with aiohttp.ClientSession() as session:
tasks = [fetch(session, sem, url) for url in domain_urls]
for url, response_text in await asyncio.gather(*tasks):
print(url)
loop = asyncio.get_event_loop()
urls = ['https://www.baidu.com', ...]
concurrency = 5 #并发量
loop.run_until_complete(main(urls, concurrency))