import asyncio
import aiohttp
async def fetch(session, url):
async with session.get(url) as response:
return await response.text()
async def fetch_all(urls, loop):
async with aiohttp.ClientSession() as session:
tasks = []
for url in urls:
task = loop.create_task(fetch(session, url))
tasks.append(task)
responses = await asyncio.gather(*tasks)
return responses
async def main():
urls = [
'https://www.example.com',
'https://www.example.com/page1',
'https://www.example.com/page2',
# 其他url
]
# 设置并发数为500
max_concurrent_requests = 500
# 每次同时发送max_concurrent_requests个请求,直到所有请求都完成
results = []
for i in range(0, len(urls), max_concurrent_requests):
batch_urls = urls[i:i+max_concurrent_requests]
batch_results = await fetch_all(batch_urls, asyncio.get_event_loop())
results += batch_results
# 处理返回结果
for result in results:
# 处理返回结果的代码
pass
if __name__ == '__main__':
asyncio.run(main())
python异步爬虫,并发500模板
最新推荐文章于 2024-07-23 14:36:35 发布