将数据集划分为训练集、测试集和验证集

import os
import shutil
from sklearn.model_selection import train_test_split

# 定义原始数据集路径
original_images_path = "D:\\PCB_defect\\datasets\\Joint_PCB_New\\images"
original_labels_path = "D:\\PCB_defect\\datasets\\Joint_PCB_New\\labels"

# 定义新数据集路径
new_dataset_path = "D:\\PCB_defect\\datasets\\Joint_PCB_A"
train_path = os.path.join(new_dataset_path, "images", "train")
test_path = os.path.join(new_dataset_path, "images", "test")
val_path = os.path.join(new_dataset_path, "images", "val")

train_labels_path = os.path.join(new_dataset_path, "labels", "train")
test_labels_path = os.path.join(new_dataset_path, "labels", "test")
val_labels_path = os.path.join(new_dataset_path, "labels", "val")

# 创建新的文件夹结构
os.makedirs(train_path, exist_ok=True)
os.makedirs(test_path, exist_ok=True)
os.makedirs(val_path, exist_ok=True)
os.makedirs(train_labels_path, exist_ok=True)
os.makedirs(test_labels_path, exist_ok=True)
os.makedirs(val_labels_path, exist_ok=True)

# 获取所有图像文件名
image_files = [f for f in os.listdir(original_images_path) if f.endswith(".bmp")]

# 划分数据集
train_files, test_files = train_test_split(image_files, test_size=0.2, random_state=42)
test_files, val_files = train_test_split(test_files, test_size=0.5, random_state=42)

# 复制图像和标签文件到新的文件夹
for file in train_files:
    shutil.copy(os.path.join(original_images_path, file), os.path.join(train_path, file))
    shutil.copy(os.path.join(original_labels_path, file.replace(".bmp", ".txt")), os.path.join(train_labels_path, file.replace(".bmp", ".txt")))

for file in test_files:
    shutil.copy(os.path.join(original_images_path, file), os.path.join(test_path, file))
    shutil.copy(os.path.join(original_labels_path, file.replace(".bmp", ".txt")), os.path.join(test_labels_path, file.replace(".bmp", ".txt")))

for file in val_files:
    shutil.copy(os.path.join(original_images_path, file), os.path.join(val_path, file))
    shutil.copy(os.path.join(original_labels_path, file.replace(".bmp", ".txt")), os.path.join(val_labels_path, file.replace(".bmp", ".txt")))

"数据集已成功划分为训练集、测试集和验证集。"

  • 3
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值