aiohttp异步爬虫

aiohttp模块Welcome to AIOHTTP — aiohttp 3.9.3 documentation

一、aiohttp 代替 requests 来执行异步的网络请求操作


import os
import time
import aiohttp
import asyncio


async def func(client, i):
    response = await client.get('https://www.baidu.com')
    print(f'第{i+1}次请求,status_code={response.status}')
    return response


async def main():
    # 声明一个异步的上下文管理器,能帮助我们自动的分配和释放资源
    async with aiohttp.ClientSession() as client:
        task_list = []
        for i in range(3):
            task = asyncio.create_task(func(client, i))
            task_list.append(task)
        done, pending = await asyncio.wait(task_list)
        for j in done:
            print(f'{j.result()}')


if __name__ == '__main__':
    asyncio.run(main())

二、下面虽然用到了异步相关的函数,但逻辑上相当于用异步实现了单线程的效果


import os
import time
import aiohttp
import asyncio


class WangZhe:
    def __init__(self):
        self.herolist_url = 'https://pvp.qq.com/web201605/js/herolist.json'
        self.skin_url = 'https://game.gtimg.cn/images/yxzj/img201606/skin/hero-info/{}/{}-bigskin-{}.jpg'
        self.headers = {
            'user-agent': ''
        }


    async def skin_download(self, session):
        new_result = await asyncio.create_task(self.herolist(session))
        for d in new_result[:4]:
            for i in range(1, 4):
                skin_response = await session.get(self.skin_url.format(d['ename'], d['ename'], i), headers=self.headers)
                if skin_response.status == 200:
                    content = await skin_response.read()
                    with open('../sublimetext_result/王者荣耀英雄皮肤/' + d['cname'] + '_' + str(i) + '.jpg', 'wb') as f:
                        f.write(content)
                    print(f"{d['cname']}第{i}张皮肤下载成功")
                else:
                    break

    async def herolist(self, session):
        herolist_responsed = await session.get(self.herolist_url, headers=self.headers)
        result = await herolist_responsed.json(content_type=None)  # 不使用 await 会报错
        new_result = []
        # print(result)
        for j in result:
            item = {}
            item['ename'] = j['ename']
            item['cname'] = j['cname']
            new_result.append(item)
        return new_result


    # async with 必须放在 async def 里面
    async def main(self):
        async with aiohttp.ClientSession() as session:
            await asyncio.create_task(self.skin_download(session))  # 记得传递session


if __name__ == '__main__':
    if not os.path.exists('../sublimetext_result/王者荣耀英雄皮肤'):
        os.mkdir('../sublimetext_result/王者荣耀英雄皮肤')
    w = WangZhe()
    start = time.time()
    asyncio.run(w.main())
    end = time.time()
    print(f'总耗时:{end - start}')  # 耗时14s
            

三、上方问题改进,另外注意对于一些返回 coroutine 的操作,前面必须加 await 来修饰,可以通过官方文档说明以明确对应操作的返回值的类型,然后决定加不加 await


'''
王者荣耀皮肤
'''
import os
import time
import aiohttp
import asyncio


class WangZhe:
    def __init__(self):
        self.skin_url = 'https://game.gtimg.cn/images/yxzj/img201606/skin/hero-info/{}/{}-bigskin-{}.jpg'
        self.herolist_url = 'https://pvp.qq.com/web201605/js/herolist.json'
        self.headers = {
            'user-agent': '',
        }


    async def skin_download(self, session, ename, cname):
        for i in range(1, 4):
            response = await session.get(self.skin_url.format(ename, ename, i), headers=self.headers)
            if response.status == 200:  # aiohttp模块获取状态码使用 status
                content = await response.read()  # aiohttp模块获取进制数据使用 read()
                with open("../sublimetext_result/王者荣耀英雄皮肤2/" + cname + "_" + str(i) + '.jpg', 'wb') as f:
                    f.write(content)
                print('{}第{}张皮肤下载成功'.format(cname, str(i)))
            else:
                break


    async def main(self):
        async with aiohttp.ClientSession() as session:
            response = await session.get(self.herolist_url, headers=self.headers)
            result = await response.json(content_type=None)
            tasks = []
            for i in result[:4]:
                ename = i['ename']
                cname = i['cname']
                res = self.skin_download(session, ename, cname)
                task = asyncio.create_task(res)
                tasks.append(task)
            await asyncio.wait(tasks)


if __name__ == '__main__':
    if not os.path.exists('../sublimetext_result/王者荣耀英雄皮肤2'):
        os.mkdir('../sublimetext_result/王者荣耀英雄皮肤2')
    w = WangZhe()
    start = time.time()
    loop = asyncio.get_event_loop()
    loop.run_until_complete(w.main())
    end = time.time()
    print('总耗时:{}'.format(end - start))

'''
英雄联盟皮肤
'''
import aiohttp
import asyncio
import os
import random


class YingXiongLianMeng:
    def __init__(self):
        self.herolist_url = 'https://game.gtimg.cn/images/lol/act/img/js/heroList/hero_list.js?ts=2803015'
        self.skin_url = 'https://game.gtimg.cn/images/lol/act/img/js/hero/{}.js?ts=2803019'
        self.headers = {
            'user-agent': ''
        }


    async def skin_down(self, session, heroId, name):
        response = await session.get(self.skin_url.format(heroId))
        dic = await response.json(content_type=None)
        num = 0
        for i in dic['skins']:
            if i['mainImg']:
                num += 1
                img_url = i['mainImg']
                await asyncio.sleep(random.randint(3, 4)/10)
                res = await session.get(img_url)
                content = await res.content.read()
                with open(f'../图灵教育/测试结果/英雄联盟_{name}/{num}.jpg', 'wb') as f:
                    f.write(content)
                    print(f'英雄联盟_{name}/{num}.jpg……下载成功')
            else:
                continue


    async def main(self):
        async with aiohttp.ClientSession(headers=self.headers) as session:
            response = await session.get(self.herolist_url)
            dic = await response.json(content_type=None)
            tasks = []
            for i in dic['hero']:
                heroId = i['heroId']
                name = i['name']
                if not os.path.exists(f'../图灵教育/测试结果/英雄联盟_{name}'):
                    os.mkdir(f'../图灵教育/测试结果/英雄联盟_{name}')
                task = asyncio.create_task(self.skin_down(session, heroId, name))
                tasks.append(task)
            await asyncio.wait(tasks)


if __name__ == '__main__':
    yxlm = YingXiongLianMeng()
    asyncio.run(yxlm.main())
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值