小工具,从总量为NUM_PIC 的数据集中随机抽取NUM_NEED张图片,复制到dst_path路径中。
import glob
import random
import shutil
NUM_NEED = 1000
src_path = r"E:\python\detect\image\\valid\images\\"
dst_path = r"E:\python\detect\image\\valid\\new\\images\\"
NUM_PIC = len(glob.glob(src_path + "*.jpg"))
if __name__ == '__main__':
# 生成随机数列表,确定拿取的图片序号
index = random.sample(range(0, NUM_PIC), NUM_NEED)
temp_num = 0
for path in glob.glob(src_path + "*.jpg"):
if temp_num in index:
image_name = path.split('\\')[-1]
image_path = src_path + image_name[:-3] + 'jpg'
lable_path = src_path + image_name[:-3] + 'txt'
shutil.copy2(image_path, dst_path) # 复制图片
try:
shutil.copy2(lable_path, dst_path) # 复制标签
except:
# 如果该图片没有标签,则生成一个空标签作为负样本使用
out_file = open(dst_path + image_name[:-3] + 'txt', 'w')
out_file.close()
print("not find" , lable_path , "add one")
else:
# 图片不在生成树的随机数表中则舍弃这张图片
pass
temp_num += 1
if temp_num % 10 == 0:
print("now finish" , temp_num , "/" , NUM_PIC)
将上述随机抽取好的图片分成train/valid/test,全自动建立文件夹。
import glob
import random
import shutil
import os
VALID_NEED = 120
TEST_NEED = 100
src_path = r"E:\python\image\\"
def build_new_filepath(src_path):
if os.path.exists(src_path + "new_devide\\"):
shutil.rmtree(src_path + "new_devide\\")
os.makedirs(src_path + "new_devide\\train\\images")
os.makedirs(src_path + "new_devide\\train\\labels")
os.makedirs(src_path + "new_devide\\valid\\images")
os.makedirs(src_path + "new_devide\\valid\\labels")
os.makedirs(src_path + "new_devide\\test\\images")
os.makedirs(src_path + "new_devide\\test\\labels")
if __name__ == '__main__':
#建文件夹
build_new_filepath(src_path)
NUM_PIC = len(glob.glob(src_path + "*.jpg"))
temp_num, temp_num2 = 0, 0
#检查参数设定
if VALID_NEED+TEST_NEED >= NUM_PIC or VALID_NEED < 0 or TEST_NEED < 0:
raise Exception("too many valid and test")
# 生成随机数列表,确定拿取的图片序号
index = random.sample(range(0, NUM_PIC), VALID_NEED + TEST_NEED)
print("total number of picture", NUM_PIC)
for path in glob.glob(src_path + "*.jpg"):
image_name = path.split('\\')[-1]
image_path = src_path + image_name[:-3] + 'jpg'
lable_path = src_path + image_name[:-3] + 'txt'
if temp_num in index:
if temp_num2 < VALID_NEED:# 图片不在生成树的随机数表中且属于valid
dst_path_image = src_path + "new_devide\\valid\\images\\"
dst_path_lable = src_path + "new_devide\\valid\\labels\\"
elif temp_num2 < VALID_NEED + TEST_NEED:# 图片不在生成树的随机数表中且属于test
dst_path_image = src_path + "new_devide\\test\\images\\"
dst_path_lable = src_path + "new_devide\\test\\labels\\"
temp_num2 += 1
else:
# 图片不在生成树的随机数表中则train
dst_path_image = src_path + "new_devide\\train\\images\\"
dst_path_lable = src_path + "new_devide\\train\\labels\\"
shutil.copy2(image_path, dst_path_image) # 复制图片
try:
shutil.copy2(lable_path, dst_path_lable) # 复制标签
except:
# 如果该图片没有标签,则生成一个空标签作为负样本使用
out_file = open(dst_path_lable + image_name[:-3] + 'txt', 'w')
out_file.close()
print("not find", lable_path, "add one")
temp_num += 1
if temp_num % 10 == 0:
print("now finish" , temp_num , "/" , NUM_PIC)