python中threading和concurrent实现多线程

30 篇文章 0 订阅
18 篇文章 4 订阅

一、threading

import requests
from lxml import etree
import threading


THREAD_NUM = 10  # 启动十个线程


def request(url):
    """
    发起请求
    :param url: 需要请求的url
    :return:
    """
    response = requests.get(url)
    if response.status_code == 200:
        text = response.text
        html_xpath = etree.HTML(text)
        rows = html_xpath.xpath('//div[@class="row results-row"]/div')
        for row in rows:
            title = row.xpath('.//h4/a/text()')[0]
            url = row.xpath('.//img/@src')[0]
            temp_dict = {
                'title': title,
                'url': url,
            }
            print(temp_dict)
    else:
        print('错误响应码为:' + str(response.status_code))


def start_thread(works):
    """
    开启多线程
    :param works: 需要抓取的url列表
    :return:
    """
    nums = len(works)
    x = nums // THREAD_NUM
    ys = nums % THREAD_NUM
    if ys > 0:
        x += 1
    for i in range(x):
        print('循环第  {}   次, 共有   {}   次'.format(i, x))
        if i == x + 1:
            work = works[i * THREAD_NUM:]
        else:
            work = works[i * THREAD_NUM:(i + 1) * THREAD_NUM]
        threads = [threading.Thread(target=request(job), args=(job,)) for job in work]
        for thread in threads:
            thread.start()
        for thread in threads:
            thread.join()


def main():
    """
    threading多线程
    :return:
    """
    url = 'https://digital.ucd.ie/index.php?q=&start={}&rows=10'
    works = [url.format(_) for _ in range(1,100)]
    start_thread(works)


if __name__ == '__main__':
    main()

二、concurrent

from concurrent.futures import ThreadPoolExecutor, as_completed
import requests
from lxml import etree


def request(url):
    """
    发起请求
    :param url: 需要请求的url
    :return:
    """
    response = requests.get(url)
    if response.status_code == 200:
        text = response.text
        html_xpath = etree.HTML(text)
        rows = html_xpath.xpath('//div[@class="row results-row"]/div')
        for row in rows:
            title = row.xpath('.//h4/a/text()')[0]
            url = row.xpath('.//img/@src')[0]
            temp_dict = {
                'title': title,
                'url': url,
            }
            return temp_dict
    else:
        print('错误响应码为:' + str(response.status_code))


def main():
    """
    concurrent多线程
    :return:
    """
    url = 'https://digital.ucd.ie/index.php?q=&start={}&rows=10'
    works = [url.format(_) for _ in range(1,100)]
    pool = ThreadPoolExecutor(max_workers=10)  # 设置最大的线程数为10

    # 方法一:  用list将任务包裹起来,使用as_completed进行迭代
    jobs = []
    for work in works:
        p = pool.submit(request, work)  # 异步提交任务
        jobs.append(p)
    for _ in as_completed(jobs): # 当某一个future任务执行完毕后,执行下面代码。会阻塞,等待线程完成后执行
        print(_.result())

    # 方法二
    # for work in works:
    #     p = pool.submit(request, work)  # 异步提交任务
    #     p.add_done_callback(lambda x: print(x.result()))

    #方法三:
    # data = pool.map(request, works)  # 取代for循环submit的操作
    # for _ in data:
    #     print(_)
    

if __name__ == '__main__':
    main()

 

 

  • 0
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值