asyncio 爬虫实例,只返回源码

# coding=utf-8
import asyncio
import time
from aiohttp import ClientSession
from contextlib import closing


async def fetch(url):
    try:
        async with ClientSession() as session:
            async with session.get(url) as response:
                response = await response.read()
                # logging.info(response[-20:])
                return response[-20:]
    except asyncio.CancelledError:
        raise


async def main(event_loop_):
    now = time.time()
    # -------------------------------------------------------------------------
    tasks = []
    urls = [
        "https://docs.python.org/3/library/asyncio-task.html",
        "https://www.cnblogs.com/yzh2857/p/10376598.html",
        "https://www.cnblogs.com/yzh2857/p/10390808.html",
        "https://matplotlib.org/api/pyplot_api.html",
        "https://www.programcreek.com/python/index/module/list",
        "https://selenium-python.readthedocs.io/api.html",
        "https://www.w3resource.com/python/python-tutorial.php",
        "https://docs.scipy.org/doc/numpy/reference/generated/numpy.ones.html",
        "https://www.w3resource.com/python/module/calendar/",
    ]
    for url in urls:
        tasks.append(asyncio.ensure_future(fetch(url)))
    # 第一种运行方式-------------------------------------------------------------
    dones, pendings = await asyncio.wait(tasks)
    # 已完成的协程
    for done in dones:
        print("已完成的协程:", done)
    # 超时未完成的协程
    for pending in pendings:
        print("超时未完成的协程  :", pending)
        pending.cancel()
    # 第二种运行方式-------------------------------------------------------------
    # result = await asyncio.gather(*tasks)
    # print("结果:", result)
    # 第三种运行方式---------------------------
    # for task in asyncio.as_completed(tasks):
    #     result = await task
    #     print("结果:", result)
    # ---------------------------------------------------------------------------
    print("总用时", time.time() - now)


if __name__ == "__main__":
    with closing(asyncio.get_event_loop()) as loop:
        loop.run_until_complete(main(loop))


输出:

已完成的协程: <Task finished coro=<fetch() done, defined at C:\Users\Administrator\Desktop\tmp.py:8> result=b'pt>\n</body>\n</html>\n'>
已完成的协程: <Task finished coro=<fetch() done, defined at C:\Users\Administrator\Desktop\tmp.py:8> result=b'iv>\n</body>\n</html>\n'>
已完成的协程: <Task finished coro=<fetch() done, defined at C:\Users\Administrator\Desktop\tmp.py:8> result=b'>\n\n  </body>\n</html>'>
已完成的协程: <Task finished coro=<fetch() done, defined at C:\Users\Administrator\Desktop\tmp.py:8> result=b'ript></body></html>\n'>
已完成的协程: <Task finished coro=<fetch() done, defined at C:\Users\Administrator\Desktop\tmp.py:8> result=b'  \n  </body>\n</html>'>
已完成的协程: <Task finished coro=<fetch() done, defined at C:\Users\Administrator\Desktop\tmp.py:8> result=b'iv>\n</body>\n</html>\n'>
已完成的协程: <Task finished coro=<fetch() done, defined at C:\Users\Administrator\Desktop\tmp.py:8> result=b'>\n\n</footer>\n</html>'>
已完成的协程: <Task finished coro=<fetch() done, defined at C:\Users\Administrator\Desktop\tmp.py:8> result=b'v>\n  </body>\n</html>'>
已完成的协程: <Task finished coro=<fetch() done, defined at C:\Users\Administrator\Desktop\tmp.py:8> result=b'pt>\n</body>\n</html>\n'>
总用时 3.107325315475464
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

迷心兔

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值