修改代码中Annotations,ImageSets和JPEGImages路径
修改划分比例rate1和rate2(默认划分训练集:验证集:测试集=7:1:2)
运行代码,划分结果保存在ImageSets/Main中:
import os
import random
image_ext = ['.bmp', '.dib', '.png', '.jpg', '.jpeg', '.pbm', '.pgm', '.ppm', '.tif', '.tiff']
def img_and_existxml(x, xml_list):
xname, ext = os.path.splitext(x)
if ext not in image_ext:
return False
else:
return xname + '.xml' in xml_list
def split_imagesets_main(images_dir, xml_dir, save_dir, rate1=0.8, rate2=0.875):
"""
:param images_dir: .../VOC2007/JPEGImages
:param xml_dir: .../VOC2007/Annotations
:param save_dir: .../VOC2007/ImageSets
:param rate1: trainval:test = 8:2
:param rate2: train:val = 7:2
:return: test.txt, train.txt, trainval.txt, val.txt
"""
assert os.path.exists(images_dir), f'\"{images_dir}\" not exists. split_imagesets_main over!'
assert os.path.exists(xml_dir), f'\"{xml_dir}\" not exists. split_imagesets_main over!'
if not os.path.exists(save_dir):
os.makedirs(save_dir)
save_main_dir = os.path.join(save_dir, 'Main')
os.makedirs(save_main_dir)
xml_list = os.listdir(xml_dir)
filelist = [x for x in os.listdir(images_dir) if img_and_existxml(x, xml_list)]
random.seed(0)
trainval_list = random.sample(filelist, int(rate1 * len(filelist)))
train_list = random.sample(trainval_list, int(rate2 * len(trainval_list)))
trainval_txt = open(os.path.join(save_main_dir, 'trainval.txt'), 'w')
train_txt = open(os.path.join(save_main_dir, 'train.txt'), 'w')
val_txt = open(os.path.join(save_main_dir, 'val.txt'), 'w')
test_txt = open(os.path.join(save_main_dir, 'test.txt'), 'w')
num = [0, 0, 0, 0]
res_txt = open(os.path.join(save_main_dir, 'result.txt'), 'w')
for file in filelist:
filename, _ = os.path.splitext(file)
if file in trainval_list:
trainval_txt.write(filename + '\n')
num[0] += 1
if file in train_list:
train_txt.write(filename + '\n')
num[1] += 1
else:
val_txt.write(filename + '\n')
num[2] += 1
else:
test_txt.write(filename + '\n')
num[3] += 1
trainval_txt.close()
train_txt.close()
val_txt.close()
test_txt.close()
res_txt.write('trainval: {}'.format(num[0]) + '\n')
res_txt.write('train: {}'.format(num[1]) + '\n')
res_txt.write('val: {}'.format(num[2]) + '\n')
res_txt.write('test: {}'.format(num[3]))
res_txt.close()
print('yes, saved in {}'.format(save_main_dir))
print('trainval: {}'.format(num[0]))
print('train: {}'.format(num[1]))
print('val: {}'.format(num[2]))
print('test: {}'.format(num[3]))
def split_abspath(images_dir, imagesets_main_dir, save_dir):
assert os.path.exists(images_dir), f'\"{images_dir}\" not exists. voc_labels over!'
assert os.path.exists(imagesets_main_dir), f'\"{imagesets_main_dir}\" not exists. voc_labels over!'
sets = ['train', 'val', 'test', 'trainval']
if not os.path.exists(save_dir):
os.makedirs(save_dir)
for s in sets:
txt = os.path.join(imagesets_main_dir, s + '.txt')
with open(txt, 'r') as f:
file_list = f.read().strip().split()
save_txt = os.path.join(save_dir, s + '.txt')
with open(save_txt, 'w') as f:
for file in file_list:
f.write(os.path.join(images_dir, file + '.jpg\n'))
if __name__ == '__main__':
# Path
images_dir = r'JPEGImages'
xml_dir = r'Annotations'
save_dir1 = r'ImageSets'
split_imagesets_main(images_dir, xml_dir, save_dir1)
参考 [1]