import os
import shutil
import random
# 设置随机种子以确保结果可重复
random.seed(0)
# 这个函数用于分割图片数据和对应的标签文件
def split_data(file_path, xml_path, new_file_path, train_rate, val_rate, test_rate):
# 初始化存储图片名和标签名的列表
each_class_image = []
each_class_label = []
# 遍历原始图片文件夹,添加图片文件名到列表
for image in os.listdir(file_path):
each_class_image.append(image)
# 遍历原始标签文件夹,添加标签文件名到列表
for label in os.listdir(xml_path):
each_class_label.append(label)
# 数据对齐,确保每个图片都有对应的标签
data = list(zip(each_class_image, each_class_label))
# 获取图片总数
total = len(each_class_image)
# 打乱数据集,确保划分是随机的
random.shuffle(data)
# 再次分离图片和标签文件名
each_class_image, each_class_label = zip(*data)
# 根据比例划分数据为训练集、验证集和测试集
train_images = each_class_image[0:int(train_rate * total)]
val_images = each_class_image[int(train_rate * total):int((train_rate + val_rate) * total)]
test_images = each_class_image[int((train_rate + val_rate) * total):]
# 根据上面分割的数据获取对应的标签
train_labels = each_class_label[0:int(train_rate * total)]
val_labels = each_class_label[int(train_rate * total):int((train_rate + val_rate) * total)]
test_labels = each_class_label[int((train_rate + val_rate) * total):]
# 将分割得到的训练集图片和标签复制到新位置
for image in train_images:
print(image)
old_path = os.path.join(file_path, image)
new_path1 = os.path.join(new_file_path, 'train', 'images')
if not os.path.exists(new_path1):
os.makedirs(new_path1)
new_path = os.path.join(new_path1, image)
shutil.copy(old_path, new_path)
for label in train_labels:
print(label)
old_path = os.path.join(xml_path, label)
new_path1 = os.path.join(new_file_path, 'train', 'labels')
if not os.path.exists(new_path1):
os.makedirs(new_path1)
new_path = os.path.join(new_path1, label)
shutil.copy(old_path, new_path)
# 将分割得到的验证集图片和标签复制到新位置
for image in val_images:
old_path = os.path.join(file_path, image)
new_path1 = os.path.join(new_file_path, 'valid', 'images')
if not os.path.exists(new_path1):
os.makedirs(new_path1)
new_path = os.path.join(new_path1, image)
shutil.copy(old_path, new_path)
for label in val_labels:
old_path = os.path.join(xml_path, label)
new_path1 = os.path.join(new_file_path, 'valid', 'labels')
if not os.path.exists(new_path1):
os.makedirs(new_path1)
new_path = os.path.join(new_path1, label)
shutil.copy(old_path, new_path)
# 将分割得到的测试集图片和标签复制到新位置
for image in test_images:
old_path = os.path.join(file_path, image)
new_path1 = os.path.join(new_file_path, 'test', 'images')
if not os.path.exists(new_path1):
os.makedirs(new_path1)
new_path = os.path.join(new_path1, image)
shutil.copy(old_path, new_path)
for label in test_labels:
old_path = os.path.join(xml_path, label)
new_path1 = os.path.join(new_file_path, 'test', 'labels')
if not os.path.exists(new_path1):
os.makedirs(new_path1)
new_path = os.path.join(new_path1, label)
shutil.copy(old_path, new_path)
# 主函数
if __name__ == '__main__':
# 原始图片文件夹路径
file_path = r"E:\ZhuoMian\天池\train\JPEGImages"
# 原始标签文件夹路径
xml_path = r"E:\ZhuoMian\天池\train\Annotations_txt"
# 新文件夹路径,用于存放划分后的数据
new_file_path = r"E:\ZhuoMian\天池\train\bisai"
# 调用划分数据的函数,训练集90%,验证集0%(不使用验证集),测试集10%
split_data(file_path, xml_path, new_file_path, train_rate=0.9, val_rate=0, test_rate=0.1)
YOLO目标检测划分数据集
最新推荐文章于 2024-07-12 15:12:00 发布