每次做新的数据集总是需要面临划分数据集的问题
就尝试写了一个划分数据集的代码
import os
import random
import shutil
def splitDataset(together_path, split_paths, use_seed=False):
all_file = os.listdir(together_path)
if use_seed:
random.seed(529)
sample = random.sample(all_file, int(len(all_file) * train_percent))
for name in all_file:
if not os.path.isdir(name):
if name in sample:
shutil.move(os.path.join(together_path, name), os.path.join(split_paths[0], name))
else:
shutil.move(os.path.join(together_path, name), os.path.join(split_paths[1], name))
val_test = os.listdir(split_paths[1])
val_sample = random.sample(val_test, int(len(val_test) * val_percent))
for left_name in val_test:
if left_name not in val_sample:
shutil.move(os.path.join(split_paths[1], left_name), os.path.join(split_paths[2], left_name))
def mergeDataset(together_path, split_paths):
for split_path in split_paths:
all_file = os.listdir(split_path)
for file in all_file:
shutil.move(os.path.join(split_path, file), os.path.join(together_path, file))
if __name__ == '__main__':
img_path = r'your\image\path'
ann_path = r'your\annotation\path'
save_path = [r'train\dir\to\save',
r'val\dir\to\save',
r'test\dir\to\save']
# 用于验证的图像所占剩下图片的比例
val_percent = 0.95
# 用于训练的图像所占比例
train_percent = 0.8
for path in save_path:
if not os.path.exists(path):
os.makedirs(path)
# 划分数据集
splitDataset(img_path, save_path, use_seed=True)
# 撤回划分数据集
# mergeDataset(img_path, save_path)