异步介绍
- 异步模型是事件驱动模型的基础
- 异步活动的执行模型可以只有一个单一的主控制流,能在单核心系统和多核心系统中运行
- 在并发执行的异步模型中,许多任务被穿插在同一时间线上,所有的任务都由一个控制流执行(单一线程)。任务的执行可能被暂停或恢复,中间的这段时间线程将会去执行其他任务
协程初步
- 协程就是一个函数,只是它满足以下几个特征:
- 有I/0依赖的操作
- 可以在进行I/O操作时暂停
- 无法直接执行
- 它的作用就是对有大量I/O操作的程序进行加速
import asyncio
async def net():
return 11
async def main():
net() #error
await net() #right
asyncio.run(main())
'''
直接net()会报错
需要用await获取返回结果或者用
task=asyncio.create_task(net())
await task
'''
import asyncio
# 定义异步函数
async def hello(i):
print('hello', i)
await asyncio.sleep(3)
print('world', i)
if __name__ == '__main__':
tasks = []
for i in range(4):
tasks.append(hello(i))
loop = asyncio.get_event_loop()
loop.run_until_complete(asyncio.wait(tasks))
'''
hello 2
hello 0
hello 3
hello 1
world 2
world 3
world 0
world 1
'''
实例用协程爬取小说
- 有问题
import asyncio
import aiohttp
from lxml import etree
headers={
'Cookie': '__guid=130189705.3464993462474227000.1587174339014.6013; Hm_lvt_c979821d0eeb958aa7201d31a6991f34=1587174340; clickbids=40160%2C40289%2C40363%2C8704; monitor_count=15; Hm_lvt_6dfe3c8f195b43b8e667a2a2e5936122=1587174588; Hm_lpvt_6dfe3c8f195b43b8e667a2a2e5936122=1587174588; Hm_lpvt_c979821d0eeb958aa7201d31a6991f34=1587174588',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36',
'Referer': 'http://www.biquge.info/'
}
async def get_text(i):
url='http://www.biquge.info/8_8704/{}'.format(i)
async with aiohttp.ClientSession(headers=headers)as session:
async with session.get(url) as resp:
print(resp.status)
text=await resp.text()
soup=etree.HTML(text)
txt=''.join(soup.xpath('//*[@id="content"]/text()'))
title=soup.xpath('//*[@id="wrapper"]/div[4]/div/div[2]/h1/text()')
txt=txt.replace(r'\xa0','')
txt = txt.replace(r'\r', '')
txt = txt.replace(r'\n', '')
txt = txt.replace('[', '')
txt = txt.replace(']', '')
'''with open('./前任无双/'+title[0]+'.txt','w',encoding='utf-8') as f:
f.write(txt)'''
print(txt)
async def get_html(url):
async with aiohttp.ClientSession(headers=headers)as session:
async with session.get(url) as resp:
print(resp.status)
text=await resp.text()
soup=etree.HTML(text)
for link in soup.xpath('//*[@id="list"]/dl/dd/a/@href'):
links.append(get_text(link))
if __name__ == '__main__':
loop=asyncio.get_event_loop()
links=[]
loop.run_until_complete(get_html('http://www.biquge.info/8_8704/'))
loop.run_until_complete(asyncio.wait(links))
- 会报too many file descriptors in select()错误,由于协程任务太多。暂未解决,哪位大佬解决了给个链接,谢谢