Python划分训练集和验证集

本文详细介绍了如何使用Python在机器学习项目中,按照4:1的比例划分训练集和验证集,包括图片文件的复制、目录结构的创建和清理多余文件的操作。
摘要由CSDN通过智能技术生成

在机器学习大规模训练时,通常需要划分训练集与验证集。下面以训练集:验证集=4:1为例,讲述如何使用Python划分训练集与验证集。

1.代码如下:

​

# 划分数据集

import os
import shutil

image_path = 'D:/0818/p-05/G00767147_1_1_20230818_102644'  # 图片文件
txt_path = 'D:/0818/p-05'  # 标签文件
new_file_path = 'D:/0818/D'  # 划分数据后的文件
train_rate = 0.8  # 训练集比例
val_rate = 0.2  # 验证集比例

# 将有对应标签的图片找出来,放到新文件夹下
labels = []
for label in os.listdir(txt_path):
    labels.append(os.path.splitext(label)[0])
for image_name in os.listdir(image_path):
    image_name = os.path.splitext(image_name)[0]

    if image_name in labels:
        image_name = image_name + ".jpg"
        shutil.copy(image_path + '/' + image_name, new_file_path)

# 计算训练集与验证集数量
images = []
for image in os.listdir(new_file_path):
    images.append(image)
    total = len(images)
    train_images = images[0:int(train_rate * total)]
    val_images = images[int(train_rate * total):int((train_rate + val_rate) * total)]

# 图片-train
for image in train_images:
    print(image)
    old_path = new_file_path + '/' + image
    new_path1 = new_file_path + '/' + 'images' + '/' + 'train'
    # new_path1 = new_file_path + '/' + 'train' + '/' + 'images'
    if not os.path.exists(new_path1):
        os.makedirs(new_path1)
    # new_path = new_path1 + '/' + image
    shutil.copy(old_path, new_path1)

# 图片-val
for image in val_images:
    old_path = new_file_path + '/' + image
    new_path1 = new_file_path + '/' + 'images' + '/' + 'val'
    # new_path1 = new_file_path + '/' + 'val' + '/' + 'images'
    if not os.path.exists(new_path1):
        os.makedirs(new_path1)
    # new_path = new_path1 + '/' + image
    shutil.copy(old_path, new_path1)

#  标签-train
images1 = []
for image in os.listdir(new_file_path + '/' + 'images' + '/' + 'train'):
    images1.append(os.path.splitext(image)[0])
for label_name in os.listdir(txt_path):
    label_name = os.path.splitext(label_name)[0]
    if label_name in images1:
        label_name = label_name + ".txt"
        label_train_path = new_file_path + '/' + 'labels' + '/' + 'train'
        if not os.path.exists(label_train_path):
            os.makedirs(label_train_path)
        shutil.copy(txt_path + '/' + label_name, label_train_path)
        shutil.copy(txt_path + '/' + 'classes.txt', label_train_path)

# 标签-val
images2 = []
for image in os.listdir(new_file_path + '/' + 'images' + '/' + 'val'):
    images2.append(os.path.splitext(image)[0])
for label_name in os.listdir(txt_path):
    label_name = os.path.splitext(label_name)[0]
    if label_name in images2:
        label_name = label_name + ".txt"
        label_val_path = new_file_path + '/' + 'labels' + '/' + 'val'
        if not os.path.exists(label_val_path):
            os.makedirs(label_val_path)
        shutil.copy(txt_path + '/' + label_name, label_val_path)
        shutil.copy(txt_path + '/' + 'classes.txt', label_val_path)

# 删除新文件夹下对应标签的图片
for name in os.listdir(new_file_path):
    if name.endswith('.jpg'):
        os.remove(os.path.join(new_file_path, name))


if image_name in labels:
image_name = image_name + ".jpg"
shutil.copy(image_path + '/' + image_name, new_file_path)

# 计算训练集与验证集数量
images = []
for image in os.listdir(new_file_path):
images.append(image)
total = len(images)
train_images = images[0:int(train_rate * total)]
val_images = images[int(train_rate * total):int((train_rate + val_rate) * total)]

# 图片-train
for image in train_images:
print(image)
old_path = new_file_path + '/' + image
new_path1 = new_file_path + '/' + 'images' + '/' + 'train'
# new_path1 = new_file_path + '/' + 'train' + '/' + 'images'
if not os.path.exists(new_path1):
os.makedirs(new_path1)
# new_path = new_path1 + '/' + image
shutil.copy(old_path, new_path1)

# 图片-val
for image in val_images:
old_path = new_file_path + '/' + image
new_path1 = new_file_path + '/' + 'images' + '/' + 'val'
# new_path1 = new_file_path + '/' + 'val' + '/' + 'images'
if not os.path.exists(new_path1):
os.makedirs(new_path1)
# new_path = new_path1 + '/' + image
shutil.copy(old_path, new_path1)

# 标签-train
images1 = []
for image in os.listdir(new_file_path + '/' + 'images' + '/' + 'train'):
images1.append(os.path.splitext(image)[0])
for label_name in os.listdir(txt_path):
label_name = os.path.splitext(label_name)[0]
if label_name in images1:
label_name = label_name + ".txt"
label_train_path = new_file_path + '/' + 'labels' + '/' + 'train'
if not os.path.exists(label_train_path):
os.makedirs(label_train_path)
shutil.copy(txt_path + '/' + label_name, label_train_path)
shutil.copy(txt_path + '/' + 'classes.txt', label_train_path)

# 标签-val
images2 = []
for image in os.listdir(new_file_path + '/' + 'images' + '/' + 'val'):
images2.append(os.path.splitext(image)[0])
for label_name in os.listdir(txt_path):
label_name = os.path.splitext(label_name)[0]
if label_name in images2:
label_name = label_name + ".txt"
label_val_path = new_file_path + '/' + 'labels' + '/' + 'val'
if not os.path.exists(label_val_path):
os.makedirs(label_val_path)
shutil.copy(txt_path + '/' + label_name, label_val_path)
shutil.copy(txt_path + '/' + 'classes.txt', label_val_path)

# 删除新文件夹下对应标签的图片
for name in os.listdir(new_file_path):
if name.endswith('.jpg'):
os.remove(os.path.join(new_file_path, name))

​

2.运行结果截图

 

 

 

  • 3
    点赞
  • 10
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值