python笔记--多进程与多线程

最新推荐文章于 2024-07-20 17:12:48 发布

不悔当初

最新推荐文章于 2024-07-20 17:12:48 发布

阅读量104

点赞数

分类专栏：新手入门文章标签： python

本文链接：https://blog.csdn.net/weixin_39088580/article/details/82712346

版权

新手入门专栏收录该内容

9 篇文章 1 订阅

订阅专栏

import time
import requests
import current
import concurrent
from concurrent import futures
import pandas as pd
import threading
from multiprocessing import Pool

# 装饰器，打印函数的执行时间
def gettime(func):
    def warapper(*args, **kwags):
        print("="*50)
        print(func.__name__, 'Strat...')
        starttime = time.time()
        func(*args)
        endtime = time.time()
        spendtime = endtime - starttime
        print(func.__name__, "End...")
        print("Spend", spendtime, "s totally")
        print("="*50)
    return warapper

# 从文件去n个测试网站
def get_urls_from_files(n):
    df = pd.read_csv('TestUrls.csv')
    urls = list(df['url'])[:n]

    return urls

# 请求并解析网页获取数据
def getdata(url, retries=3):
    headers = {}
    try:
        html = requests.get(url, headers=headers)
    except requests.exceptions.ConnectionError as e:
        print('下载出错，错误原因：'， e)
        html = None
        # 5XX错误为服务器错误，可以重新请求
    if(html != None and 500 <= html.status_code <600 and retries):
        retries -= 1
        print("服务器出错正在重试...")
        getdata(url, retries)
        data = html.text
    else:
        data = None
    return data

# 串行
@gettime
def Mynormal():
    for url in urls:
        getdata(url)

# 进程池
@gettime
def MyprocessPool(num=10):
    pool = Pool(num)
    results = pool.map(getdata, urls)
    pool.close()
    pool.join()
    return results

# 线程池
@gettime
def Myfutures(num_of_max_works=10):
    with concurrent.futures.threadPoolExecutor(max_workers = num_of_max_works) as executor:
        executor.map(getdata, urls)

if __name__ == '__main__':
    urls = get_urls_from_file(100)
    # 串行
    Mynormal()
    # 进程池
    MyprocessPool(10)
    # 线程池
    Myfutures(100)

不悔当初

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
python笔记--多进程与多线程

import timeimport requestsimport currentimport concurrentfrom concurrent import futuresimport pandas as pdimport threadingfrom multiprocessing import Pool# 装饰器，打印函数的执行时间def gettime(func): ...
复制链接

扫一扫

专栏目录