将数据集划分为训练集和验证集

在运行本脚本之前,确保在脚本所在的文件夹下包含images_source文件夹和labels_source文件夹,里面是原始的没有划分成训练集和验证集的图片数据和标签数据。

import os
import shutil
import random
import math

source_images_path = r"images_source"
all_files = os.listdir(source_images_path)
num_data = len(all_files)
train_prop = 0.8
train_num = math.ceil(train_prop * num_data)  # 这里记得要使用ceil函数

print(f"一共有{num_data}张图片")
val_num = num_data - train_num
print(f"将会划分出训练集数据总共{train_num}张,验证集数据总共{val_num}张")

# 生成随机数
min_range = 0
max_range = num_data - 1

all_list = list(range(num_data))
train_samples = random.sample(range(min_range, max_range + 1), train_num)
train_samples.sort()
val_samples = [_ for _ in all_list if _ not in train_samples]

train_files = []
val_files = []
for x in train_samples:
    train_files.append(all_files[x])

for x in val_samples:
    val_files.append(all_files[x])

print(f"划分出训练集数据{len(train_files)}个")
print(f"划分出验证集数据{len(val_files)}个")
# 检查是否有重合,肯定是没有的,但是保险起见,检查一下
chonghe = [x for x in train_files if x in val_files]
print(f"验证集和训练集中的数据重合个数为{len(chonghe)}个")

images_path = r"images"
train_images_path = r"images/train"
val_images_path = r"images/val"
if not os.path.exists(images_path):
    os.makedirs(images_path)
if not os.path.exists(train_images_path):
    os.makedirs(train_images_path)
if not os.path.exists(val_images_path):
    os.makedirs(val_images_path)
#
labels_path = r"labels"
train_labels_path = r"labels/train"
val_labels_path = r"labels/val"
if not os.path.exists(labels_path):
    os.makedirs(labels_path)
if not os.path.exists(train_labels_path):
    os.makedirs(train_labels_path)
if not os.path.exists(val_labels_path):
    os.makedirs(val_labels_path)

# 复制训练集图片到指定目录
for i in train_files:
    shutil.copy(os.path.join(source_images_path, i), os.path.join(train_images_path, i))

# 复制验证集图片到指定目录
for i in val_files:
    shutil.copy(os.path.join(source_images_path, i), os.path.join(val_images_path, i))

source_labels_path = r"labels_source"
# 划分训练集标签和验证集标签
train_files = os.listdir(train_images_path)
val_files = os.listdir(val_images_path)
train_labels = []
val_labels = []
for i in train_files:
    label_name = i.split('.')[0] + '.txt'
    train_labels.append(label_name)

for i in val_files:
    label_name = i.split('.')[0] + '.txt'
    val_labels.append(label_name)

# 复制训练集标签到指定目录
for i in train_labels:
    shutil.copy(os.path.join(source_labels_path, i), os.path.join(train_labels_path, i))
# 复制验证集标签到指定目录
for i in val_labels:
    shutil.copy(os.path.join(source_labels_path, i), os.path.join(val_labels_path, i))

# 将整理好的训练集和验证集的图片和标签放到一个datasets文件夹下
target_file_path = 'datasets'
# 源文件夹路径
src_folder1 = images_path
src_folder2 = labels_path

# 目标文件夹路径
dst_folder = target_file_path

# 获取源文件夹名称
src_folder_name1 = os.path.basename(src_folder1)
src_folder_name2 = os.path.basename(src_folder2)

# 在目标文件夹下创建一个同名子文件夹
dst_subfolder1 = os.path.join(dst_folder, src_folder_name1)
os.makedirs(dst_subfolder1, exist_ok=True)

dst_subfolder2 = os.path.join(dst_folder, src_folder_name2)
os.makedirs(dst_subfolder2, exist_ok=True)
# 将源文件夹中的内容移动到目标子文件夹中
for item in os.listdir(src_folder1):
    item_path = os.path.join(src_folder1, item)
    shutil.move(item_path, dst_subfolder1)

for item in os.listdir(src_folder2):
    item_path = os.path.join(src_folder2, item)
    shutil.move(item_path, dst_subfolder2)

shutil.rmtree(images_path)
shutil.rmtree(labels_path)

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值