YOLO数据集划分代码
- 只需要修改创建目标目录的路径便可进行数据集划分
- 目前该数据集划分是7:2:1,如果我们想修改成6:2:2,可以在#计算每个集的大小出进行更改。
import os
import random
import shutil
image_dir = r'D:\gz_data\1\images'
label_dir = r'D:\gz_data\1\labels'
train_image_dir = r'D:\gz_data\1\images\train'
val_image_dir = r'D:\gz_data\1\\images\val'
test_image_dir = r'D:\gz_data\1\images\test'
train_label_dir = r'D:\gz_data\1\labels\train'
val_label_dir = r'D:\gz_data\1\labels\val'
test_label_dir = r'D:\gz_data\1\labels\test'
os.makedirs(train_image_dir, exist_ok=True)
os.makedirs(val_image_dir, exist_ok=True)
os.makedirs(test_image_dir, exist_ok=True)
os.makedirs(train_label_dir, exist_ok=True)
os.makedirs(val_label_dir, exist_ok=True)
os.makedirs(test_label_dir, exist_ok=True)
image_files = [f for f in os.listdir(image_dir) if os.path.isfile(os.path.join(image_dir, f))]
random.shuffle(image_files)
total_files = len(image_files)
train_size = int(total_files * 0.7)
val_size = int(total_files * 0.2)
test_size = total_files - train_size - val_size
train_files = image_files[:train_size]
val_files = image_files[train_size:train_size + val_size]
test_files = image_files[train_size + val_size:]
def move_files(files, image_dest_dir, label_dest_dir):
for file in files:
image_path = os.path.join(image_dir, file)
label_path = os.path.join(label_dir, os.path.splitext(file)[0] + '.txt')
if os.path.exists(image_path) and os.path.exists(label_path):
shutil.move(image_path, image_dest_dir)
shutil.move(label_path, label_dest_dir)
move_files(train_files, train_image_dir, train_label_dir)
move_files(val_files, val_image_dir, val_label_dir)
move_files(test_files, test_image_dir, test_label_dir)
print("数据集划分完成")