python异步协程实战:wallhaven壁纸网站

开发环境:Python3.9、idea

 相较于线程池大约优化了50%的速度,在学校网100M宽带下,爬取一页24张需要60s

代码中默认是下载第一页的图片,想要多爬点的话加个循环就好了

url = "https://wallhaven.cc/toplist?page=1"

整套代码如下,仅供参考

import time
import aiohttp
import asyncio
import requests
import aiofiles
from lxml import etree


async def aiodownload(li):
    # 发送下载请求
    # 得到图片内容
    # 保存到文件
    # resp.content.read()  ==>  requests.content()

    async with aiohttp.ClientSession() as session:
        sub_url = "".join(li.xpath('./figure/a/@href'))
        sub_resp = requests.get(sub_url, headers=headers)  # get请求子网页
        if len(sub_resp.text) < 1000:  # 粗糙的429处理办法 :(
            await asyncio.sleep(3)
        sub_html = etree.HTML(sub_resp.text)
        img_url = "".join(sub_html.xpath('/html/body/main/section/div[1]/img/@src'))  # 图片的下载链接

        async with session.get(img_url, headers=headers) as img_resp:
            img_name = img_url.split('/')[-1]
            async with aiofiles.open("img/" + img_name, mode="wb") as f:
                await f.write(await img_resp.content.read())
                print("下载完成", img_name)
        await asyncio.sleep(0)


async def main():
    url = "https://wallhaven.cc/toplist?page=1"
    resp = requests.get(url, headers=headers)
    html = etree.HTML(resp.text)
    # 拿到图片组
    lis = html.xpath('/html/body/main/div[1]/section[1]/ul/li')
    tasks = []
    for li in lis:
        tasks.append(asyncio.create_task(aiodownload(li)))  # 将转换后的协程对象添加进入任务列表
    await asyncio.wait(tasks)


if __name__ == '__main__':
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36 Edg/100.0.1185.44"
    }
    t1 = time.time()
    print("开始下载...")
    loop = asyncio.get_event_loop()
    loop.run_until_complete(main())
    print("下载完成...")
    t2 = time.time()
    print('运行时间:', t2 - t1)
# requests.get() --> 异步操作
# 实战:wallhaven壁纸网站
import time
import aiohttp
import asyncio
import requests
import aiofiles
from lxml import etree


# 异步协程优化版,跳过获取子页面,通过拼接获取图片下载地址,从单线程100秒,到异步协程只需要20秒,500%的提升
async def aiodownload(li, ft):
    # 发送下载请求
    # 得到图片内容
    # 保存到文件
    # resp.content.read()  ==>  requests.content()

    async with aiohttp.ClientSession() as session:
        sub_url = "".join(li.xpath('./figure/a/@href'))
        url_1 = sub_url.split('/')[-1]
        url_2 = url_1[:2]
        img_url = 'https://w.wallhaven.cc/full/' + url_2 + '/wallhaven-' + url_1 + ft  # 图片的下载链接
        async with session.get(img_url, headers=headers) as img_resp:
            img_name = img_url.split('/')[-1]
            async with aiofiles.open("img/" + img_name, mode="wb") as f:
                await f.write(await img_resp.content.read())
                print("下载完成", img_name)
        await asyncio.sleep(3)


async def main():
    tasks = []
    page = int(input("请输入要下载的页数,从第一页开始:"))
    for i in range(1, page + 1):
        print(f"正在下载第{i}页")
        url = f"https://wallhaven.cc/toplist?page={i}"
        resp = requests.get(url, headers=headers)
        html = etree.HTML(resp.text)
        # 拿到图片组
        lis = html.xpath('/html/body/main/div[1]/section[1]/ul/li')
        for li in lis:
            ft = li.xpath('./figure/div/span[2]/span')
            if len(ft) == 0:
                ft = '.jpg'
            else:
                ft = '.png'
            tasks.append(asyncio.create_task(aiodownload(li, ft)))  # 将转换后的协程对象添加进入任务列表
    await asyncio.wait(tasks)


if __name__ == '__main__':
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36 Edg/100.0.1185.44"
    }
    t1 = time.time()

    loop = asyncio.get_event_loop()
    loop.run_until_complete(main())

    print("下载完成...")
    t2 = time.time()
    print('运行时间:', t2 - t1)

  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值