有时候我们需要对图片进行如加噪、放缩等变换,当数据集比较大时,遍历整个数据集所用时间太长,为此可以写一点多线程处理的代码,下面以压缩为例子:
import os
import cv2
import numpy as np
from threading import Thread
input_folder = '/data0/wjh/coco/val2017'
output_jpg50_folder = '/data0/wjh/coco/val2017-jpg50'
output_jpg70_folder = '/data0/wjh/coco/val2017-jpg70'
num_threads = 5
def jpg_compress ( filelist) :
for file in filelist:
img = cv2. imread( os. path. join( input_folder, file ) )
new_jpg50name = os. path. join( output_jpg50_folder, file . split( '.' ) [ 0 ] + '.jpg' )
new_jpg70name = os. path. join( output_jpg70_folder, file . split( '.' ) [ 0 ] + '.jpg' )
cv2. imwrite( new_jpg50name, img, [ int ( cv2. IMWRITE_JPEG_QUALITY) , 50 ] )
cv2. imwrite( new_jpg70name, img, [ int ( cv2. IMWRITE_JPEG_QUALITY) , 70 ] )
def multi_thread_process ( ) :
if num_threads == 1 :
jpg_compress( os. listdir( input_folder) )
return
filelist_total = os. listdir( input_folder)
filenum = len ( filelist_total)
filenum_each_thread = int ( filenum/ num_threads)
thread_list = [ ]
for i in range ( num_threads- 1 ) :
thread_list. append( Thread( target= jpg_compress, args= ( filelist_total[ i* filenum_each_thread : ( i+ 1 ) * filenum_each_thread] , ) ) )
thread_list. append( Thread( target= jpg_compress, args= ( filelist_total[ ( num_threads- 1 ) * filenum_each_thread : ] , ) ) )
for th in thread_list:
th. start( )
for th in thread_list:
th. join( )
multi_thread_process( )
当图像处理的需求产生变化时,只需要修改jpg_compress函数即可