concurrent.futures 中的chunksize

import time
from multiprocessing.pool import Pool
from concurrent.futures import as_completed, ProcessPoolExecutor

NUMBERS = range(1, 100000)
K = 50


def f(x):
    r = 0
    for k in range(1, K + 2):
        r += x ** (1 / k ** 1.5)
    return r


if __name__ == '__main__':

    start = time.time()

    l = []
    pool = Pool(3)
    for num, result in zip(NUMBERS, pool.map(f, NUMBERS)):
        l.append(result)
    print(len(l))
    print('cost:{}'.format(time.time() - start))


    print('ProcessPoolExecutor without chunksize:\n')
    start = time.time()
    l = []
    with ProcessPoolExecutor(max_workers=3) as executor:
        for num, result in zip(NUMBERS, executor.map(f, NUMBERS)):
            l.append(result)
    print(len(l))
    print('cost:{}'.format(time.time() - start))


    print('ProcessPoolExecutor with chunksize:\n')
    start = time.time()
    l = []
    with ProcessPoolExecutor(max_workers=3) as executor:
        chunksize, extra = divmod(len(NUMBERS), executor._max_workers * 4)
        for num, result in zip(NUMBERS, executor.map(f, NUMBERS, chunksize=chunksize)):
            l.append(result)
    print(len(l))

    print('COST: {}'.format(time.time() - start))

运行结果:

multiprocessing.pool

99999
cost:0.23070788383483887


ProcessPoolExecutor without chunksize:

99999
cost:7.456393241882324


ProcessPoolExecutor with chunksize:

99999
COST: 0.2285900115966797

Process finished with exit code 0

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
根据错误:AttributeError: module 'networkx' has no attribute 'from_numpy_matrix',修改下述代码:import os import jieba.analyse from textrank4zh import TextRank4Keyword import concurrent.futures # 定义分块读取函数 def read_in_chunks(file_path, chunk_size=1024*1024): with open(file_path, 'r', encoding='utf-8') as f: while True: data = f.read(chunk_size) if not data: break yield data # 定义处理函数 def process_chunk(chunk): # 使用jieba分词提取关键词 jieba_keywords = jieba.analyse.extract_tags(chunk, topK=10, withWeight=True) # 使用textrank4zh提取关键词 tr4w = TextRank4Keyword() tr4w.analyze(chunk, lower=True, window=2) textrank_keywords = tr4w.get_keywords(10, word_min_len=2) # 合并两种方法提取的关键词 keywords = jieba_keywords + textrank_keywords return keywords # 读取文本文件,并按块处理 chunks = [] for chunk in read_in_chunks('input.txt'): chunks.append(chunk) # 多线程并行处理 results = [] with concurrent.futures.ThreadPoolExecutor() as executor: futures = [executor.submit(process_chunk, chunk) for chunk in chunks] for future in concurrent.futures.as_completed(futures): results.extend(future.result()) # 合并结果,并按权重降序排序 keywords = {} for keyword, weight in results: if keyword in keywords: keywords[keyword] += weight else: keywords[keyword] = weight keywords = sorted(keywords.items(), key=lambda x: x[1], reverse=True) keywords = [(keyword, weight) for keyword, weight in keywords if len(keyword) > 1][:10] # 输出到txt文件 with open('output.txt', 'w', encoding='utf-8') as f: for keyword, weight in keywords: f.write(keyword + '\t' + str(weight) + '\n')
06-03
根据错误信息 `AttributeError: module 'networkx' has no attribute 'from_numpy_matrix'`,可能是因为缺少 `networkx` 模块的相关依赖库。可以尝试安装一下这些依赖库,例如 `scipy` 库。 修改后的代码如下: ```python import os import jieba.analyse from textrank4zh import TextRank4Keyword import concurrent.futures import scipy # 定义分块读取函数 def read_in_chunks(file_path, chunk_size=1024*1024): with open(file_path, 'r', encoding='utf-8') as f: while True: data = f.read(chunk_size) if not data: break yield data # 定义处理函数 def process_chunk(chunk): # 使用jieba分词提取关键词 jieba_keywords = jieba.analyse.extract_tags(chunk, topK=10, withWeight=True) # 使用textrank4zh提取关键词 tr4w = TextRank4Keyword() tr4w.analyze(chunk, lower=True, window=2) textrank_keywords = tr4w.get_keywords(10, word_min_len=2) # 合并两种方法提取的关键词 keywords = jieba_keywords + textrank_keywords return keywords # 读取文本文件,并按块处理 chunks = [] for chunk in read_in_chunks('input.txt'): chunks.append(chunk) # 多线程并行处理 results = [] with concurrent.futures.ThreadPoolExecutor() as executor: futures = [executor.submit(process_chunk, chunk) for chunk in chunks] for future in concurrent.futures.as_completed(futures): results.extend(future.result()) # 合并结果,并按权重降序排序 keywords = {} for keyword, weight in results: if keyword in keywords: keywords[keyword] += weight else: keywords[keyword] = weight keywords = sorted(keywords.items(), key=lambda x: x[1], reverse=True) keywords = [(keyword, weight) for keyword, weight in keywords if len(keyword) > 1][:10] # 输出到txt文件 with open('output.txt', 'w', encoding='utf-8') as f: for keyword, weight in keywords: f.write(keyword + '\t' + str(weight) + '\n') ``` 请注意,如果还存在其他错误,需要根据具体的错误信息进行相应的修改。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值