Python多进程使用

最新推荐文章于 2024-07-28 15:46:11 发布

sunrise_ccx

最新推荐文章于 2024-07-28 15:46:11 发布

阅读量114

点赞数 3

文章标签： python

本文链接：https://blog.csdn.net/qq_27061325/article/details/135852307

版权

异步多进程运行方法：

import numpy as np 
import scipy.io as sio
import cv2
import os
import lmdb
import math
import pickle
from tqdm import tqdm
from multiprocessing import Process, Manager, Lock, Pool

print('start loading gt file')
gts = sio.loadmat('SynthText/gt.mat')
image_paths = gts['imnames'][0]
txt_anns = gts['txt'][0]
dataset_length = len(image_paths)
print('loading done!')

env = lmdb.open('SynthText.lmdb', map_size=1099511627776)

TARGET_HEIGHT = 416
TARGET_WIDTH = 608

def write_cache(env, cache):
    with env.begin(write=True) as txn:
        for k, v in cache.items():
            txn.put(k, v)

def worker(index, num_workers):

    interp = int(math.ceil(dataset_length / num_workers))
    start = index * interp
    end = min((index + 1) * interp, dataset_length)

    step = 0
    cache = {}
    for i in tqdm(range(start, end)):
        step += 1
        image_path = str(image_paths[i][0])

        image = cv2.imread('SynthText/' + image_path, 1)
        # height, width, _ = image.shape
        image = cv2.resize(image, (TARGET_WIDTH, TARGET_HEIGHT), interpolation=cv2.INTER_LINEAR)
        # ratio_h, ratio_w = 1.0 * TARGET_HEIGHT / height, 1.0 * TARGET_WIDTH / width

        word_ann = []
        txt_ann = txt_anns[i]
        
        for j in range(len(txt_ann)):
            bbox_ann = txt_ann[j].split('\n')
            for k in range(len(bbox_ann)):
                word_ann.extend(bbox_ann[k].strip().split(' '))

        # print(txt_ann)
        # print(word_ann)

        pkl_image = cv2.imencode('.jpg', image)[1]
        pkl_label = pickle.dumps(word_ann)

        cache[('image-' + image_path).encode()] = pkl_image
        cache[('label-' + image_path).encode()] = pkl_label

        if step % 100 == 0:
            write_cache(env, cache)
            cache = {}

    write_cache(env, cache)

process_list = []
num_workers = 80

p = Pool(num_workers)
for i in range(num_workers):
    p.apply_async(worker, args=(i, num_workers))
p.close()
p.join()

如果要在多个进程中更改同一个全局变量，需要使用Manager()。