因为不熟悉的缘故,总觉得写的不好,一种异步代码与同步代码参杂着的感觉,希望能够得到指点。
import os
import re
import aiofiles
import aiohttp
import asyncio
from lxml import etree
# 发请求获取html文本
async def fetch(session, url):
async with session.get(url) as response:
return await response.text()
# 解析html获取每组的列表页链接
async def parser(html):
tree = etree.HTML(html)
pic_href_list = tree.xpath('//*[@class="listbox"]/a/@href')
pic_title_list = tree.xpath('//*[@class="listbox"]/a/@title')
for href, title in zip(pic_href_list, pic_title_list):
path_id = re.findall('\d