【python】深度学习训练模型划分训练集,测试集,验证集

划分训练集,测试集,验证集的主要流程就是先读取所有数据(images或labels)的路径,存储到数组内,然后打乱数组,按照比例将数组切片,然后分段分别输出训练集,测试集和验证集。

核心部分:

import os
import numpy as np
root = r"D:\dataset\belt\JPEGImages"
filename = []

for root, dir, files in os.walk(root):
    for file in files:
        filename.append(file)  # 去除后缀


#打乱文件名列表
np.random.shuffle(filename)
#划分训练集、测试集,默认比例6:2:2
train = filename[:int(len(filename)*0.6)]
trainval = filename[int(len(filename)*0.6):int(len(filename)*0.8)]
val = filename[int(len(filename)*0.8):]

然后不同数据集的需求不一致,导致后处理不一致。

大致总结了一些:

需要输出train.txt,trainval.txt,val.txt

import os
import numpy as np
rootpath = r"D:\dataset\belt\JPEGImages"

#构建所有文件名的列表,dir为label
filename = []
#label = []
# dirs = os.listdir(root)
for root, dir, files in os.walk(rootpath ):
    for file in files:
        print(file)
        filename.append(file[:-4])  # 去除后缀


#打乱文件名列表
np.random.shuffle(filename)
#划分训练集、测试集,默认比例6:2:2
train = filename[:int(len(filename)*0.6)]
trainval = filename[int(len(filename)*0.6):int(len(filename)*0.8)]
val = filename[int(len(filename)*0.8):]

#分别写入train.txt, test.txt
with open(os.path.join(output,'train.txt'), 'w') as f1, open(os.path.join(output,'trainval.txt'), 'w') as f2,open(os.path.join(output,'val.txt'), 'w') as f3:
    for i in train:
        f1.write(i + '\n')
    for i in trainval:
        f2.write(i + '\n')
    for i in val:
        f3.write(i + '\n')

print('成功!')

然而,有些数据集测试并不是读取txt信息,而是将文件划分到train,val,test等不同的目录中。

直接将原图和标签按照划分的训练集,测试集,验证集,将图像和标签文件复制(或者移动)到指定目录。
代码:

import os
import numpy as np
import shutil
imgpath = r"D:\dataset\belt\JPEGImages"
annotationpath = r"D:\dataset\belt\SegmentationClass"
output_path = r'D:\workspace\BiSeNet-master\datasets\ade20k'
#构建所有文件名的列表,dir为label
filename = []
#label = []
# dirs = os.listdir(root)
for root, dir, files in os.walk(imgpath):
    for file in files:
        filename.append(file)  # save file name


#打乱文件名列表
np.random.shuffle(filename)
#划分训练集、测试集,默认比例6:2:2
train = filename[:int(len(filename)*0.6)]
val = filename[int(len(filename)*0.6):int(len(filename)*0.8)]
test = filename[int(len(filename)*0.8):]

# copy images
outimages = os.path.join(output_path, 'images')
if not os.path.exists(outimages):
    os.mkdir(outimages)
outimages_train = os.path.join(outimages, 'training')
if not os.path.exists(outimages_train):
    os.mkdir(outimages_train)
outimages_validation = os.path.join(outimages, 'validation')
if not os.path.exists(outimages_validation):
    os.mkdir(outimages_validation)
outimages_test = os.path.join(outimages, 'test')
if not os.path.exists(outimages_test):
    os.mkdir(outimages_test)

# copy annotations
outannotations = os.path.join(output_path, 'annotations')
if not os.path.exists(outannotations):
    os.mkdir(outannotations)
outannotations_train = os.path.join(outannotations, 'training')
if not os.path.exists(outannotations_train):
    os.mkdir(outannotations_train)
outannotations_validation = os.path.join(outannotations, 'validation')
if not os.path.exists(outannotations_validation):
    os.mkdir(outannotations_validation)
outannotations_test = os.path.join(outannotations, 'test')
if not os.path.exists(outannotations_test):
    os.mkdir(outannotations_test)

# copyfile
for i in train:
    print(os.path.join(imgpath, i))
    print(os.path.join(outimages_train, i))

    shutil.copyfile(os.path.join(imgpath, i), os.path.join(outimages_train, i))
    annotations_name = i[:-3]+ 'png'
    shutil.copyfile(os.path.join(annotationpath, annotations_name), os.path.join(outannotations_train, annotations_name))

for i in val:
    shutil.copyfile(os.path.join(imgpath, i), os.path.join(outimages_validation, i))
    annotations_name = i[:-3]+ 'png'
    shutil.copyfile(os.path.join(annotationpath, annotations_name), os.path.join(outannotations_validation, annotations_name))

for i in test:
    shutil.copyfile(os.path.join(imgpath, i), os.path.join(outimages_test, i))
    annotations_name = i[:-3]+ 'png'

    shutil.copyfile(os.path.join(annotationpath, annotations_name), os.path.join(outannotations_test, annotations_name))

print('成功!')

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值