concurrent.futures.ProcessPoolExecutor 是一个高级接口,用于轻松并行执行任务。
而 multiprocessing.Process 提供了低级别的进程控制能力,允许更细粒度的定制和管理。
#以下代码包括excel合并的过程
import pandas as pd
import os
from concurrent.futures import ProcessPoolExecutor
import glob
def write_part_to_excel(data, filename):
df = pd.DataFrame(data)
df.to_excel(filename, index=False, engine='openpyxl')
def split_and_write(data, num_parts, base_filename):
# 计算每部分数据的数量
part_size = len(data) // num_parts
filenames = []
with ProcessPoolExecutor() as executor:
futures = []
for i in range(num_parts):
start = i * part_size
end = (i + 1) * part_size if i < num_parts - 1 else None
part_data = data[start:end]
# 生成文件名
filename = f'D:\\desktop\\new\\{base_filename}_{i}.xlsx'
filenames.append(filename)
# 提交任务
future = executor.submit(write_part_to_excel, part_data, filename)
futures.append(future)
# 等待所有任务完成
for future in futures:
future.result()
return filenames
def merge_excel_files(filenames, output_filename):
combined_data = pd.DataFrame()
for filename in filenames:
data = pd.read_excel(filename, engine='openpyxl')
combined_data = pd.concat([combined_data, data], ignore_index=True)
combined_data.to_excel(output_filename, index=False, engine='openpyxl')
# 示例数据
# 使用列表推导式创建二维列表
data = [[i, i, i] for i in range(1, 120001)]
# 模拟主函数入口
if __name__ == '__main__':
# 拆分并写入Excel文件
filenames = split_and_write(data, 4, 'part')
# 合并所有Excel文件
merge_excel_files(filenames, 'D:\\desktop\\combined_output.xlsx')
# 清理临时文件
# for filename in filenames:
# os.remove(filename)