在制作数据集的时候需要批量移动文件夹中的文件到另一个文件夹中,编写了相应的程序自动化实现该任务。
导入相应的库
import os
import numpy as np
import shutil
读取相应的文件名。该数据集的组织形式同yolov5数据集形式,分为images以及labels两个大文件夹。其文件名是一一对应的。
images_path = './datasets/sim2real/images'
labels_path = './datasets/sim2real/labels'
train_name = 'train'
val_name = 'val'
train_images_path = os.path.join(images_path, train_name)
val_images_path = os.path.join(images_path, val_name)
train_labels_path = os.path.join(labels_path, train_name)
val_labels_path = os.path.join(labels_path, val_name)
# 读取相应的文件名于files_name列表中
file_list = os.walk(train_path)
files_name = []
for roots, dirs, files in file_list:
for file_name in files:
if os.path.splitext(file_name)[1] == '.png':
files_name.append(os.path.splitext(file_name)[0])
# 将list转换为array方便之后处理
files_name = np.array(files_name)
随机生成下标
shuffled_index = np.random.permutation(len(files_name))
split_index = int(len(files_name)*0.25) # 所需移动文件数量,在此处我设定为总文件数的1/4
val_index = shuffled_index[:split_index] # 前split_index个文件下标为要移动的文件下标
val_files = files_name[val_index] # 单独将文件名读出
将val_files移动到指定的位置
for file_name in val_files:
# move images
image_name = os.path.join(val_images_path, file_name + '.png')
shutil.move(os.path.join(train_images_path, file_name + '.png'), image_name)
# move labels
label_name = os.path.join(val_labels_path, file_name + '.txt')
shutil.move(os.path.join(train_labels_path, file_name + '.txt'), label_name)