异步爬虫练习项目,需要改进的地方请大神指点!感谢🙏
'''
使用异步抓取umei.cc上的美女写真图片
'''
import asyncio
import aiohttp
from lxml import etree
import time
END_PAG = 20
URL = "https://www.umei.cc/meinvtupian/meinvxiezhen/{}.htm"
async def main():
tasks = []
for page in range(1,END_PAG+1):
tasks.append(asyncio.create_task(get_html(URL.format(page),page)))
await asyncio.wait(tasks)
async def get_html(url,page):
t1 = time.time()
print(f'开始抓取第【{page}】页图片!')
async with aiohttp.ClientSession() as session:
async with session.get(url) as rsp:
html = await rsp.text()
html = etree.HTML(html)
await downloads_img(html,session)
t2 = time.time()
print(f'第【{page}】页已抓取完毕!耗时间【%s】' %(t2-t1))
async def downloads_img(html,session):
img_lists = html.xpath('//div[@class="TypeList"]//li//img/@src')
for i in img_lists:
file_name = './meitu/' + i.rsplit('/', 1)[1]
async with session.get(i) as rsp:
with open(file_name,'wb') as f:
f.write(await rsp.content.read())
if __name__ == '__main__':
asyncio.run(main())