训练集、验证集、测试集按比例精确划分
创建py文件,将下属代码放入所创建的文件里,VOC2007数据集与py文件在同一目录下
# 数据集划分
import os
import random
root_dir = './VOC2007/'
## trainval_percent为 train 与 val在整个数据集中的比例
trainval_percent = 0.8
# train_percent 为 train在整个数据集中的比例
train_percent = 0.7
# 因此上述配置得到
## 0.7train 0.1val 0.2test
xmlfilepath = root_dir + 'Annotations'
txtsavepath = root_dir + 'ImageSets/Main'
total_xml = os.listdir(xmlfilepath)
num = len(total_xml) # 100
list = range(num)
tv = int(num * trainval_percent) # 80
tr = int(tv * train_percent) # 80*0.7=56
trainval = random.sample(list, tv)
train = random.sample(trainval, tr)
ftrainval = open(root_dir + 'ImageSets/Main/trainval.txt', 'w')
ftest = open(root_dir + 'ImageSets/Main/test.txt', 'w')
ftrain = open(root_dir + 'ImageSets/Main/train.txt', 'w')
fval = open(root_dir + 'ImageSets/Main/val.txt', 'w')
for i in list:
name = total_xml[i][:-4] + '\n'
if i in trainval:
ftrainval.write(name)
if i in train:
ftrain.write(name)
else:
fval.write(name)
else:
ftest.write(name)
ftrainval.close()
ftrain.close()
fval.close()
ftest.close()