利用asyncio aiohttp同步抓取网站的上的图片
图片url: https://tu.fengniao.com/5968/
可知获取图片信息的具体url为
https://tu.fengniao.com/ajax/ajaxTuPicList.php?page=1&tagsId=5968&action=getPicLists
tagsId=5968 为图片的分类,另外
tagsId=13 # 类别为美女图片
tagsId=15 # 类别为儿童图片
tagsId=6391 #类别为私房照
page=1 #为页码,你可以多获取不同页面的图片信息
点击Preview,可以返回内容为JSON,其中src为具体图片的链接地址,可以下载图片
直接上干货
import aiohttp
import asyncio
import json
async def down_save_pic(pic_id, pic_url):
async with aiohttp.ClientSession() as session:
async with session.get(pic_url) as response:
with open(f'./landscape/{pic_id}.jpg', 'wb') as fp:
fp.write(await response.content.read())
print(f'{pic_id}.jpg finished, url is {pic_url}')
async def get_one_page(url):
print(url)
async with aiohttp.ClientSession() as session:
async with session.get(url) as response:
web_text = await response.text()
web_json = json.loads(web_text)
photos = web_json['photos']['photo']
for photo in photos:
pic_url = photo['src']
pic_id = photo['id']
await down_save_pic(pic_id, pic_url)
async def get_all_jobs():
tagsId=5968 # landscape photos
# tagsId=6391 # private photos
# tagsId = 15 # kids
# tagsId = 13 # beautiful gris
url_a = f'https://tu.fengniao.com/ajax/ajaxTuPicList.php?tagsId={tagsId}&action=getPicLists&page='
urls = [url_a + page_no for page_no in range(1, 15)]
tasks = [asyncio.create_task(get_one_page(url)) for url in urls]
await asyncio.wait(tasks)
if __name__ == '__main__':
asyncio.run(get_all_jobs())
这段代码可以直接运行,使用的python版本为3.7