aiohttp + 实现多任务异步协程实现异步爬虫
环境安装:
pip install aiohttp
使用:该模块中的ClientSession对象实例化一个session,进行网络请求的发送。
普通的网络请求的发送:
#
import asyncio
import time
import requests
start = time.time()
urls = ['','','']
async def get_page(url):
print('正在下载',url)
#requests.get()是基于同步,必须使用基于异步的网络请求模块进行指定url的请求发送
#aiohttp:基于异步网络请求的模块
response =requests.get(url=url)
print('下载完成',response.text)
tasks = []
for url in urls:
c = get_page(url) #get_page()函数被async修饰,因此返回的是一个协程对象c
task = asyncio.ensure_future(c) #将协程对象封装到一个任务当中
tasks.append(task)
loop = asyncio.get_event_loop()
loop.run_until_complete(asyncio.wait(tasks))
print(time.time()-start)
aiohttp中ClientSession实现网络请求的发送
#首先需要安装pip install aiohttp
import requests
import asyncio
import time
import aiohttp
start = time.time()
urls = [
]
async def get_page(url):
async with aiohttp.ClientSession() as session:
async with await session.get(url) as response:
#text()返回字符串形式的响应数据
#read()返回的二进制形式的响应数据
#json()返回的是json对象
#注意:获取响应数据操作之前一定要使用await进行手动挂起
page_text = await response.text()
print(page_text)
tasks = []
for url in urls:
c = get_page(url)
task = asyncio.ensure_future(c)
tasks.append(task)
loop = asyncio.get_event_loop()
loop.run_until_complete(asyncio.wait(tasks))
end = time.time()
print(end-start)