在运行本脚本之前,确保在脚本所在的文件夹下包含images_source文件夹和labels_source文件夹,里面是原始的没有划分成训练集和验证集的图片数据和标签数据。
import os
import shutil
import random
import math
source_images_path = r"images_source"
all_files = os.listdir(source_images_path)
num_data = len(all_files)
train_prop = 0.8
train_num = math.ceil(train_prop * num_data) # 这里记得要使用ceil函数
print(f"一共有{num_data}张图片")
val_num = num_data - train_num
print(f"将会划分出训练集数据总共{train_num}张,验证集数据总共{val_num}张")
# 生成随机数
min_range = 0
max_range = num_data - 1
all_list = list(range(num_data))
train_samples = random.sample(range(min_range, max_range + 1), train_num)
train_samples.sort()
val_samples = [_ for _ in all_list if _ not in train_samples]
train_files = []
val_files = []
for x in train_samples:
train_files.append(all_files[x])
for x in val_samples:
val_files.append(all_files[x])
print(f"划分出训练集数据{len(train_files)}个")
print(f"划分出验证集数据{len(val_files)}个")
# 检查是否有重合,肯定是没有的,但是保险起见,检查一下
chonghe = [x for x in train_files if x in val_files]
print(f"验证集和训练集中的数据重合个数为{len(chonghe)}个")
images_path = r"images"
train_images_path = r"images/train"
val_images_path = r"images/val"
if not os.path.exists(images_path):
os.makedirs(images_path)
if not os.path.exists(train_images_path):
os.makedirs(train_images_path)
if not os.path.exists(val_images_path):
os.makedirs(val_images_path)
#
labels_path = r"labels"
train_labels_path = r"labels/train"
val_labels_path = r"labels/val"
if not os.path.exists(labels_path):
os.makedirs(labels_path)
if not os.path.exists(train_labels_path):
os.makedirs(train_labels_path)
if not os.path.exists(val_labels_path):
os.makedirs(val_labels_path)
# 复制训练集图片到指定目录
for i in train_files:
shutil.copy(os.path.join(source_images_path, i), os.path.join(train_images_path, i))
# 复制验证集图片到指定目录
for i in val_files:
shutil.copy(os.path.join(source_images_path, i), os.path.join(val_images_path, i))
source_labels_path = r"labels_source"
# 划分训练集标签和验证集标签
train_files = os.listdir(train_images_path)
val_files = os.listdir(val_images_path)
train_labels = []
val_labels = []
for i in train_files:
label_name = i.split('.')[0] + '.txt'
train_labels.append(label_name)
for i in val_files:
label_name = i.split('.')[0] + '.txt'
val_labels.append(label_name)
# 复制训练集标签到指定目录
for i in train_labels:
shutil.copy(os.path.join(source_labels_path, i), os.path.join(train_labels_path, i))
# 复制验证集标签到指定目录
for i in val_labels:
shutil.copy(os.path.join(source_labels_path, i), os.path.join(val_labels_path, i))
# 将整理好的训练集和验证集的图片和标签放到一个datasets文件夹下
target_file_path = 'datasets'
# 源文件夹路径
src_folder1 = images_path
src_folder2 = labels_path
# 目标文件夹路径
dst_folder = target_file_path
# 获取源文件夹名称
src_folder_name1 = os.path.basename(src_folder1)
src_folder_name2 = os.path.basename(src_folder2)
# 在目标文件夹下创建一个同名子文件夹
dst_subfolder1 = os.path.join(dst_folder, src_folder_name1)
os.makedirs(dst_subfolder1, exist_ok=True)
dst_subfolder2 = os.path.join(dst_folder, src_folder_name2)
os.makedirs(dst_subfolder2, exist_ok=True)
# 将源文件夹中的内容移动到目标子文件夹中
for item in os.listdir(src_folder1):
item_path = os.path.join(src_folder1, item)
shutil.move(item_path, dst_subfolder1)
for item in os.listdir(src_folder2):
item_path = os.path.join(src_folder2, item)
shutil.move(item_path, dst_subfolder2)
shutil.rmtree(images_path)
shutil.rmtree(labels_path)