python多线程
处理百万量级数据时,单线程太慢了,笔者第一次使用多线程,记录一下模版
#多线程task代码模版
import multiprocessing as mp
mp.set_start_method('spawn') #gpu多线程需要添加这一句
def xxx_task_xxx_fn(task_list, samples, xxx):
pass
def xxx_task_xxx(task_list, worker_num, xxx):
workload = (len(task_list) + worker_num - 1) / worker_num
workload = int(workload)
samples = mp.Manager().dict() #list等,根据任务数据格式
plist = []
for k in range(worker_num):
start, end = k*workload, min((k+1)*workload, len(filenames))
p = mp.Process(target=xxx_task_xxx_fn, args=(task_list[start: end], samples, xxx)
p.start()
plist.append(p)
for p in plist:
p.join()