划分训练集,测试集,验证集的主要流程就是先读取所有数据(images或labels)的路径,存储到数组内,然后打乱数组,按照比例将数组切片,然后分段分别输出训练集,测试集和验证集。
核心部分:
import os
import numpy as np
root = r"D:\dataset\belt\JPEGImages"
filename = []
for root, dir, files in os.walk(root):
for file in files:
filename.append(file) # 去除后缀
#打乱文件名列表
np.random.shuffle(filename)
#划分训练集、测试集,默认比例6:2:2
train = filename[:int(len(filename)*0.6)]
trainval = filename[int(len(filename)*0.6):int(len(filename)*0.8)]
val = filename[int(len(filename)*0.8):]
然后不同数据集的需求不一致,导致后处理不一致。
大致总结了一些:
需要输出train.txt,trainval.txt,val.txt
import os
import numpy as np
rootpath = r"D:\dataset\belt\JPEGImages"
#构建所有文件名的列表,dir为label
filename = []
#label = []
# dirs = os.listdir(root)
for root, dir, files in os.walk(rootpath ):
for file in files:
print(file)
filename.append(file[:-4]) # 去除后缀
#打乱文件名列表
np.random.shuffle(filename)
#划分训练集、测试集,默认比例6:2:2
train = filename[:int(len(filename)*0.6)]
trainval = filename[int(len(filename)*0.6):int(len(filename)*0.8)]
val = filename[int(len(filename)*0.8):]
#分别写入train.txt, test.txt
with open(os.path.join(output,'train.txt'), 'w') as f1, open(os.path.join(output,'trainval.txt'), 'w') as f2,open(os.path.join(output,'val.txt'), 'w') as f3:
for i in train:
f1.write(i + '\n')
for i in trainval:
f2.write(i + '\n')
for i in val:
f3.write(i + '\n')
print('成功!')
然而,有些数据集测试并不是读取txt信息,而是将文件划分到train,val,test等不同的目录中。
直接将原图和标签按照划分的训练集,测试集,验证集,将图像和标签文件复制(或者移动)到指定目录。
代码:
import os
import numpy as np
import shutil
imgpath = r"D:\dataset\belt\JPEGImages"
annotationpath = r"D:\dataset\belt\SegmentationClass"
output_path = r'D:\workspace\BiSeNet-master\datasets\ade20k'
#构建所有文件名的列表,dir为label
filename = []
#label = []
# dirs = os.listdir(root)
for root, dir, files in os.walk(imgpath):
for file in files:
filename.append(file) # save file name
#打乱文件名列表
np.random.shuffle(filename)
#划分训练集、测试集,默认比例6:2:2
train = filename[:int(len(filename)*0.6)]
val = filename[int(len(filename)*0.6):int(len(filename)*0.8)]
test = filename[int(len(filename)*0.8):]
# copy images
outimages = os.path.join(output_path, 'images')
if not os.path.exists(outimages):
os.mkdir(outimages)
outimages_train = os.path.join(outimages, 'training')
if not os.path.exists(outimages_train):
os.mkdir(outimages_train)
outimages_validation = os.path.join(outimages, 'validation')
if not os.path.exists(outimages_validation):
os.mkdir(outimages_validation)
outimages_test = os.path.join(outimages, 'test')
if not os.path.exists(outimages_test):
os.mkdir(outimages_test)
# copy annotations
outannotations = os.path.join(output_path, 'annotations')
if not os.path.exists(outannotations):
os.mkdir(outannotations)
outannotations_train = os.path.join(outannotations, 'training')
if not os.path.exists(outannotations_train):
os.mkdir(outannotations_train)
outannotations_validation = os.path.join(outannotations, 'validation')
if not os.path.exists(outannotations_validation):
os.mkdir(outannotations_validation)
outannotations_test = os.path.join(outannotations, 'test')
if not os.path.exists(outannotations_test):
os.mkdir(outannotations_test)
# copyfile
for i in train:
print(os.path.join(imgpath, i))
print(os.path.join(outimages_train, i))
shutil.copyfile(os.path.join(imgpath, i), os.path.join(outimages_train, i))
annotations_name = i[:-3]+ 'png'
shutil.copyfile(os.path.join(annotationpath, annotations_name), os.path.join(outannotations_train, annotations_name))
for i in val:
shutil.copyfile(os.path.join(imgpath, i), os.path.join(outimages_validation, i))
annotations_name = i[:-3]+ 'png'
shutil.copyfile(os.path.join(annotationpath, annotations_name), os.path.join(outannotations_validation, annotations_name))
for i in test:
shutil.copyfile(os.path.join(imgpath, i), os.path.join(outimages_test, i))
annotations_name = i[:-3]+ 'png'
shutil.copyfile(os.path.join(annotationpath, annotations_name), os.path.join(outannotations_test, annotations_name))
print('成功!')