使用异步(asyncio,aiohttp)获取彼岸图网高清图
获取的是首页高清图,不是4k图,
思路:
1,先获取页面的缩略图的高清图片的网址
2,在从高清网址中提取高清图片的url和图片名字
3,下载保存
提醒:
我这里做了延时,代码中加了sleep(1,3)
如果有代理的话可以去掉加入代理请求
如果要获取分类的壁纸,改一下main里的url
# -*- coding: utf-8 -*-
import asyncio
import aiohttp
import aiofiles
import random
import re
import os
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.75 Safari/537.36',
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
"referer": "https://pic.netbian.com/index_2.html",
"cookie": "__yjs_duid=1_a1d59e22709861dce3d23f54cadd3a6d1648820743957; zkhanecookieclassrecord=%2C66%2C60%2C59%2C58%2C53%2C55%2C; yjs_js_security_passport=eb7ad7fe69a1cfbecb4dc9824221b4b829fc64b3_1649212502_js"
}
# 这里必须要带cookie请求,否则请求不到源码
# 从每个页面中获取图片的高清地址
async def get_page(url):
async with aiohttp.ClientSession() as session:
async with session.get(url, headers=headers) as resp:
# print(await resp.text())
page_parse = re.findall('<a href="(.*?)" target="_blank">.*?</b></a>', await resp.text())
for page in page_parse:
await get_img(page, session)
# 从高清地址中获取图片URL和标题
async def get_img(url, session):
main_url = 'https://pic.netbian.com' # 拼接完整的URL
async with session.get(main_url + url, headers=headers) as resp:
img_url = re.findall('<img src="(.*?)" data-pic=".*?" alt=".*?"', await resp.text())
img_title = re.findall('<img src=".*?" data-pic=".*?" alt="(.*?)"', await resp.text())
for image_url, image_title in zip(img_url, img_title):
image_url = image_url.replace('thumb', 'pic')
await download_img(image_url, image_title, session)
# 下载图片
async def download_img(url, title, session):
main_url = 'https://pic.netbian.com' # 拼接完整的图片URL
async with session.get(main_url + url, headers=headers) as resp:
async with aiofiles.open('壁纸_aiohttp/' + title + '.jpg', 'wb') as f:
await f.write(await resp.read())
print('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>', title, 'Download Success!')
await asyncio.sleep(random.randint(1, 3))
# 创建图片保存文件夹
def mkdir():
if not os.path.exists('壁纸_aiohttp'):
os.mkdir('壁纸_aiohttp')
# 主函数
def main():
mkdir()
urls = [ # 创建页面URL列表
f'https://pic.netbian.com/index_{page}.html'
for page in range(2, 12) # 页数是从2开始的,因为1页不带数字
]
loop = asyncio.get_event_loop()
tasks = [get_page(url) for url in urls]
loop.run_until_complete(asyncio.wait(tasks))
if __name__ == '__main__':
main()
爬取结果
我这里只获取了前十页