数据集分离为训练集和验证集(示例为 训练集:验证集=9:1)
Python代码
import os
import random
import shutil
def split_dataset_yolo(image_folder, label_folder, train_ratio):
train_image_folder = os.path.join(image_folder, 'train')
train_label_folder = os.path.join(label_folder, 'train')
valid_image_folder = os.path.join(image_folder, 'val')
valid_label_folder = os.path.join(label_folder, 'val')
os.makedirs(train_image_folder, exist_ok=True)
os.makedirs(train_label_folder, exist_ok=True)
os.makedirs(valid_image_folder, exist_ok=True)
os.makedirs(valid_label_folder, exist_ok=True)
image_files = [f for f in os.listdir(image_folder) if f.endswith(('.jpg', '.png', '.jpeg'))]
num_images = len(image_files)
num_train = int(num_images * train_ratio)
num_valid = num_images - num_train
random.shuffle(image_files)
for i, image_file in enumerate(image_files):
label_file = image_file.replace('.' + image_file.split('.')[-1], '.txt')
if i < num_train:
shutil.copy2(os.path.join(image_folder, image_file), train_image_folder)
shutil.copy2(os.path.join(label_folder, label_file), train_label_folder)
else:
shutil.copy2(os.path.join(image_folder, image_file), valid_image_folder)
shutil.copy2(os.path.join(label_folder, label_file), valid_label_folder)
print("Dataset split completed.")
image_folder_path = ''
label_folder_path = ''
train_ratio = 0.9
split_dataset_yolo(image_folder_path, label_folder_path, train_ratio)
效果图
图片
标签