数据集处理03
跨域用来划分验证集,训练集,测试集;
如果需要更多文件分类,只需在 for循环中进行添加,从而制作自己想要读取的数据集结构。以下是代码的示例。(随便划分和命名的,有需要自己更改即可)
这套代码主要针对于voc数据集的划分,更换自己的数据集进行训练。
import os
import random
trainval_percent = 0.1
train_percent = 1
imgfilepath = 'G:/research/AdvSemiSeg-master/dataset/DUT-USEG/JPEGImages'
# 这是voc 数据集中真实图像所在文件夹,依据此进行划分
# txtsavepath = 'G:/research/AdvSemiSeg-master/dataset/DUT-USEG'
total_img = os.listdir(imgfilepath)
num = len(total_img)
list = range(num) # 读取文件夹中所有文件数量,并以列表形式记录
# 依据比例percent进行划分
tv = int(num * trainval_percent)
tr = int(tv * train_percent)
trainval = random.sample(list, tv)
train = random.sample(trainval, tr)
ftrainval = open('G:/research/AdvSemiSeg-master/dataset/DUT-USEG/trainval.txt', 'w')
ftest = open('G:/research/AdvSemiSeg-master/dataset/DUT-USEG/test.txt', 'w')
ftrain = open('G:/research/AdvSemiSeg-master/dataset/DUT-USEG/train_aug.txt', 'w')
fval = open('G:/research/AdvSemiSeg-master/dataset/DUT-USEG/val.txt', 'w')
for i in list: # 遍历列表
name = total_img[i][:-4] + '\n'
if i in trainval:
ftrainval.write(name)
if i in train:
ftest.write(name)
else:
fval.write(name)
else:
ftrain.write(name)
ftrainval.close() # 保存
ftrain.close()
fval.close()
ftest.close()