准备工作
然后执行下面一段代码,需要修改的地方已经详细标注,只需要替换为自己的路径即可
import random
import os
import shutil
# 共有3处需要修改
#需要修改 1
# 修改训练集、验证集和测试集的比例分配
# 要分割的数据集数量要能被分配比例整除
train_percent = 0.8
test_percent = 0.1
valid_percent = 0.1
#需要修改 2
# 原始标注文件的路径(被分割的原始文件)
# 改成自己的路径
image_path = '.../.../images'
label_path = '.../.../labels'
images_files_list = os.listdir(image_path)
labels_files_list = os.listdir(label_path)
print('images files: {}'.format(images_files_list))
print('labels files: {}'.format(labels_files_list))
total_num = len(images_files_list)
print('total_num: {}'.format(total_num))
test_num = int(total_num * test_percent)
valid_num = int(total_num * valid_percent)
train_num = int(total_num * train_percent)
# 对应文件的索引
test_image_index = random.sample(range(total_num), test_num)
valid_image_index = random.sample(range(total_num), valid_num)
train_image_index = random.sample(range(total_num), train_num)
#需要修改 3
for i in range(total_num):
print('src image: {}, i={}'.format(images_files_list[i], i))
if i in test_image_index:
# '.../.../images/{}' 原始标注文件的路径(被分割的原始文件) , '...../test/images/{}'分配后的文件存放的地点
# 改成自己的路径
shutil.copyfile('.../.../images/{}'.format(images_files_list[i]), '...../test/images/{}'.format(images_files_list[i]))
shutil.copyfile('.../.../labels/{}'.format(labels_files_list[i]), '...../test/labels/{}'.format(labels_files_list[i]))
elif i in valid_image_index:
shutil.copyfile('.../.../images/{}'.format(images_files_list[i]), '...../valid/images/{}'.format(images_files_list[i]))
shutil.copyfile('.../.../labels/{}'.format(labels_files_list[i]), '...../valid/labels/{}'.format(labels_files_list[i]))
else:
shutil.copyfile('.../.../images/{}'.format(images_files_list[i]), '...../train/images/{}'.format(images_files_list[i]))
shutil.copyfile('.../.../labels/{}'.format(labels_files_list[i]), '...../train/labels/{}'.format(labels_files_list[i]))
执行完成后,数据集已分配完毕