YOLO数据集划分(test+val+train)

YOLO数据集划分(test+val+train)

import os
import shutil
import random

random.seed(0)


def split_data(file_path, label_path, new_file_path, train_rate, val_rate, test_rate):
    # Get all image files, ignoring non-image files
    eachclass_image = [file for file in os.listdir(file_path) if file.lower().endswith(('.png', '.jpg', '.jpeg'))]
    total = len(eachclass_image)
    random.shuffle(eachclass_image)
    train_images = eachclass_image[:int(train_rate * total)]
    val_images = eachclass_image[int(train_rate * total):int((train_rate + val_rate) * total)]
    test_images = eachclass_image[int((train_rate + val_rate) * total):]

    # Create base directories for images and labels
    images_base_path = os.path.join(new_file_path, 'images')
    labels_base_path = os.path.join(new_file_path, 'labels')
    for path in [images_base_path, labels_base_path]:
        if not os.path.exists(path):
            os.makedirs(path)

    # Copy the files into the appropriate directories
    for dataset_type, images in [('train', train_images), ('val', val_images), ('test', test_images)]:
        images_path = os.path.join(images_base_path, dataset_type)
        labels_path = os.path.join(labels_base_path, dataset_type)

        if not os.path.exists(images_path):
            os.makedirs(images_path)
        if not os.path.exists(labels_path):
            os.makedirs(labels_path)

        for image in images:
            old_image_path = os.path.join(file_path, image)
            new_image_path = os.path.join(images_path, image)
            shutil.copy(old_image_path, new_image_path)

            base_filename = os.path.splitext(image)[0]
            old_label_path = os.path.join(label_path, base_filename + '.txt')
            new_label_path = os.path.join(labels_path, base_filename + '.txt')
            if os.path.exists(old_label_path):
                shutil.copy(old_label_path, new_label_path)
            else:
                print(f"Label file for {image} does not exist, skipping.")


if __name__ == '__main__':
    #以下路径需要自己替换
    image_path = "D:/Academic/Deep Learning/yolov10-main/data/NEU/NEU-DET (1)/IMAGES"
    txt_path = 'D:/Academic/Deep Learning/yolov10-main/data/NEU/NEU-DET (1)/labels'
    new_dataset_path = "D:/Academic/Deep Learning/yolov10-main/data/NEU/divided"
    split_data(image_path, txt_path, new_dataset_path, train_rate=0.7, val_rate=0.1, test_rate=0.2)

  • 4
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
假设你的数据集文件夹为data,其中包含了许多图片和对应的标签文件,下面是划分数据集的Python代码: ```python import os import random import shutil # 设置随机数种子,确保每次运行结果一致 random.seed(42) # 划分比例 train_ratio = 0.8 val_ratio = 0.1 test_ratio = 0.1 # 数据集路径 data_path = "data" # 存放划分后的数据集路径 train_path = "train" val_path = "val" test_path = "test" # 获取所有图片的文件名(去掉扩展名) image_names = [os.path.splitext(filename)[0] for filename in os.listdir(data_path) if filename.endswith(".jpg")] # 随机打乱图片文件名 random.shuffle(image_names) # 计算划分后的数据集大小 num_images = len(image_names) num_train = int(num_images * train_ratio) num_val = int(num_images * val_ratio) num_test = num_images - num_train - num_val # 创建存放划分数据集的文件夹 os.makedirs(train_path, exist_ok=True) os.makedirs(val_path, exist_ok=True) os.makedirs(test_path, exist_ok=True) # 将图片和标签文件复制到对应的文件夹中 for i, image_name in enumerate(image_names): if i < num_train: shutil.copy(os.path.join(data_path, image_name + ".jpg"), os.path.join(train_path, image_name + ".jpg")) shutil.copy(os.path.join(data_path, image_name + ".txt"), os.path.join(train_path, image_name + ".txt")) elif i < num_train + num_val: shutil.copy(os.path.join(data_path, image_name + ".jpg"), os.path.join(val_path, image_name + ".jpg")) shutil.copy(os.path.join(data_path, image_name + ".txt"), os.path.join(val_path, image_name + ".txt")) else: shutil.copy(os.path.join(data_path, image_name + ".jpg"), os.path.join(test_path, image_name + ".jpg")) shutil.copy(os.path.join(data_path, image_name + ".txt"), os.path.join(test_path, image_name + ".txt")) ``` 此代码将会把原始数据集按照8:1:1的比例划分为训练集、验证集和测试集,每个数据集里面包含了对应的图片文件和标签文件。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值