转载请注明作者和出处: http://blog.csdn.net/john_bh/
python 使用 concurrent.futures 并行处理数据,速度提高 2~6 倍。对比代码如下:
import glob
import os
import time
import cv2
from concurrent import futures
from multiprocessing import cpu_count
def loadData(dataDir):
start_time=time.time()
for image_filename in glob.glob(os.path.join(dataDir,"*.png")):
img = cv2.imread(image_filename)
img = cv2.resize(img, (600, 600))
print("loadData Time taken: {}".format(time.time() - start_time))
def load_and_resize(image_filename):
img = cv2.imread(image_filename)
img = cv2.resize(img, (600, 600))
def loadData2(dataDir):
start_time = time.time()
with futures.ProcessPoolExecutor() as executor: #多线程处理,你有多少CPU核,就启动多少Python进程,我的是6个核,就同时启动6个
image_files = glob.glob(os.path.join(dataDir, "*.png"))
# 第一个参数是处理函数,第二个参数是原始数据。用6个进程,来并行对 image_file 文件进行 load_and_resize 处理
executor.map(load_and_resize,image_files)
print("ProcessPoolExecutor loadData Time taken: {}".format(time.time() - start_time))
if __name__ == "__main__":
dataDir = r"D\imgs"
print("cpu kenels:{}".format(cpu_count()))
loadData(dataDir)
loadData2(dataDir)
cpu kenels:8
loadData Time taken: 26.0
ProcessPoolExecutor loadData Time taken: 3.796875