requests_html使用asyncio

import asyncio
import functools
from concurrent.futures.thread import ThreadPoolExecutor
from requests_html import HTMLSession
import sys
session = HTMLSession()


async def get_response(executor, *, url, loop: asyncio.AbstractEventLoop = None, ):
    if not loop:
        loop = asyncio.get_running_loop()
    request = functools.partial(session.get, url)
    return loop.run_in_executor(executor, request)


async def bulk_requests(executor, *,
                        urls,
                        loop: asyncio.AbstractEventLoop = None, ):
    for url in urls:
        yield await get_response(executor, url=url, loop=loop)


def filter_unsuccesful_requests(responses_and_exceptions):
    return filter(
        lambda url_and_response: not isinstance(url_and_response[1], Exception),
        responses_and_exceptions.items()
    )


async def main():
    executor = ThreadPoolExecutor(10)
    urls = [
        "https://baidu.com",
        "https://cnblogs.com",
        "https://163.com",
    ]
    requests = [request async for request in bulk_requests(executor, urls=urls, )]
    responses_and_exceptions = dict(zip(urls, await asyncio.gather(*requests, return_exceptions=True)))
    responses = {url: resp.html for (url, resp) in filter_unsuccesful_requests(responses_and_exceptions)}

    for res in responses.items():
        print(res[1].xpath("//head//title//text()")[0])

    for url in urls:
        if url not in responses:
            print(f"No successful request could be made to {url}. Reason: {responses_and_exceptions[url]}",
                  file=sys.stderr)


asyncio.run(main())

转载于:https://www.cnblogs.com/c-x-a/p/11028356.html

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值