python 线程池/AIO(异步非阻塞)发送http请求示例

分别测试了 多线程、线程池、aio 等模式

线程受CPU调度限制,大请求量下 AIO 效率最高,详见代码

注释中有详细说明,main 方法中是程序入口


"""
python 各种方式发起 http 请求对比
参考:
https://plainenglish.io/blog/send-http-requests-as-fast-as-possible-in-python-304134d46604
"""
import time

import requests
from requests.sessions import Session
from threading import Thread,local
from queue import Queue
from concurrent.futures import ThreadPoolExecutor


def sync_get(url_list: list):
    """
    同步方式请求
    :param url_list:
    :return:
    """

    def download_link(url: str) -> None:
        result = requests.get(url).content
        print(f'Read {len(result)} from {url}')

    def download_all(urls: list) -> None:
        for url in urls:
            download_link(url)

    start = time.time()
    download_all(url_list)
    end = time.time()
    print(f'sync download {len(url_list)} links in {end - start} seconds')


def sync_get_share_session(url_list: list):
    """
    依然是同步请求,但是可以共享 Session
    共享 Session 可以记录 cookie,保持登录状态
    复用 TCP 减少 TLS 时间等
    详情可以搜索【python session 优化 requests 性能】
    TODO 根据情况补充:session 和 cookie,后面安全经常会用到
    :param url_list:
    :return:
    """
    def download_link(url: str, session: Session):
        with session.get(url) as response:
            result = response.content
            print(f'Read {len(result)} from {url}')

    def download_all(urls: list):
        with requests.Session() as session:
            for url in urls:
                download_link(url, session=session)

    start = time.time()
    download_all(url_list)
    end = time.time()
    print(f'download {len(url_list)} links in {end - start} seconds')


def multi_thread_get(url_list):
    """
    多线程请求,启动10个线程
    要点:
    1. 多线程操作 list 时会存在线程安全问题,这里使用 queue 解决,关键词【python queue 线程安全队列】
    2. 多线程共享 Session 对象会有线程安全问题,这里使用 thread_local 解决,保证一个线程只有一个 session
    :param url_list:
    :return:
    """
    # 将压入线程安全队列
    q = Queue(maxsize=0)  # Use a queue to store all URLs
    for url in url_list:
        q.put(url)
    thread_local = local()  # The thread_local will hold a Session object

    def get_session() -> Session:
        if not hasattr(thread_local, 'session'):
            thread_local.session = requests.Session()  # Create a new Session if not exists
        return thread_local.session

    def download_link() -> None:
        '''download link worker, get URL from queue until no url left in the queue'''
        session = get_session()
        while True:
            # 全部执行完成后线程会一直卡住,需要手动停止
            # 设置超时时间的话,可以在 $timeout 秒后抛异常终止
            url = q.get(block=True, timeout=None)
            with session.get(url) as response:
                print(f'Read {len(response.content)} from {url}')
            q.task_done()  # tell the queue, this url downloading work is done

    def download_all() -> None:
        '''Start 10 threads, each thread as a wrapper of downloader'''
        thread_num = 10
        for i in range(thread_num):
            t_worker = Thread(target=download_link)
            t_worker.start()
        q.join()  # main thread wait until all url finished downloading

    print("start work")
    start = time.time()
    download_all()
    end = time.time()
    print(f'download {len(url_list)} links in {end - start} seconds')


def thread_pool_get(url_list):
    thread_local = local()

    def get_session() -> Session:
        if not hasattr(thread_local, 'session'):
            thread_local.session = requests.Session()
        return thread_local.session

    def download_link(url: str):
        session = get_session()
        with session.get(url) as response:
            print(f'Read {len(response.content)} from {url}')

    def download_all() -> None:
        with ThreadPoolExecutor(max_workers=10) as executor:
            executor.map(download_link, url_list)

    start = time.time()
    download_all()
    end = time.time()
    print(f'download {len(url_list)} links in {end - start} seconds')


def aio_get(url_list):
    """
    异步非阻塞的方式发送请求
    这种情况下,并发可以非常搞,性能看带宽、网卡等情况,不过并发太高可能会被服务端限流
    :param url_list:
    :return:
    """
    import asyncio
    import aiohttp
    from aiohttp.client import ClientSession

    async def download_link(url: str, session: ClientSession):
        async with session.get(url) as response:
            result = await response.text()
            print(f'Read {len(result)} from {url}')

    async def download_all(urls: list):
        my_conn = aiohttp.TCPConnector(limit=40)
        async with aiohttp.ClientSession(connector=my_conn) as session:
            tasks = []
            for url in urls:
                task = asyncio.ensure_future(download_link(url=url, session=session))
                tasks.append(task)

            await asyncio.gather(*tasks, return_exceptions=True)  # the await must be nest inside of the session

    start = time.time()
    asyncio.run(download_all(url_list))
    end = time.time()

    print(f'download {len(url_list)} links in {end - start} seconds')


if __name__ == '__main__':
    _url_list = ["https://cn.bing.com/", "https://www.baidu.com/", "https://www.so.com/"] * 50
    # 方法1 同步方式请求
    # sync_get(_url_list)  # 58.78762245178223
    # 方法2 同步请求,但是共享 session
    # sync_get_share_session(_url_list)  # 32.01118564605713
    # 方法3 多线程请求
    # multi_thread_get(_url_list)  # 3.5877575874328613
    # 方法4 线程池
    thread_pool_get(_url_list)  # 3.5470234870910645
    # 方法5 异步非阻塞的方式发送请求
    aio_get(_url_list)  # 1.354555606842041

多线程示意图

线程池示意图

AIO异步非阻塞IO

  • 2
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值