【目标检测】按比例划分VOC数据集:训练集,验证集,测试集

修改代码中Annotations,ImageSets和JPEGImages路径

修改划分比例rate1和rate2(默认划分训练集:验证集:测试集=7:1:2)

运行代码,划分结果保存在ImageSets/Main中:

import os
import random

image_ext = ['.bmp', '.dib', '.png', '.jpg', '.jpeg', '.pbm', '.pgm', '.ppm', '.tif', '.tiff']

def img_and_existxml(x, xml_list):
    xname, ext = os.path.splitext(x)
    if ext not in image_ext:
        return False
    else:
        return xname + '.xml' in xml_list

def split_imagesets_main(images_dir, xml_dir, save_dir, rate1=0.8, rate2=0.875):
    """
    :param images_dir: .../VOC2007/JPEGImages
    :param xml_dir: .../VOC2007/Annotations
    :param save_dir: .../VOC2007/ImageSets
    :param rate1: trainval:test = 8:2
    :param rate2: train:val = 7:2
    :return: test.txt, train.txt, trainval.txt, val.txt
    """
    assert os.path.exists(images_dir), f'\"{images_dir}\" not exists. split_imagesets_main over!'
    assert os.path.exists(xml_dir), f'\"{xml_dir}\" not exists. split_imagesets_main over!'

    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    save_main_dir = os.path.join(save_dir, 'Main')
    os.makedirs(save_main_dir)

    xml_list = os.listdir(xml_dir)
    filelist = [x for x in os.listdir(images_dir) if img_and_existxml(x, xml_list)]

    random.seed(0)
    trainval_list = random.sample(filelist, int(rate1 * len(filelist)))
    train_list = random.sample(trainval_list, int(rate2 * len(trainval_list)))

    trainval_txt = open(os.path.join(save_main_dir, 'trainval.txt'), 'w')
    train_txt = open(os.path.join(save_main_dir, 'train.txt'), 'w')
    val_txt = open(os.path.join(save_main_dir, 'val.txt'), 'w')
    test_txt = open(os.path.join(save_main_dir, 'test.txt'), 'w')

    num = [0, 0, 0, 0]
    res_txt = open(os.path.join(save_main_dir, 'result.txt'), 'w')

    for file in filelist:
        filename, _ = os.path.splitext(file)
        if file in trainval_list:
            trainval_txt.write(filename + '\n')
            num[0] += 1
            if file in train_list:
                train_txt.write(filename + '\n')
                num[1] += 1
            else:
                val_txt.write(filename + '\n')
                num[2] += 1
        else:
            test_txt.write(filename + '\n') 
            num[3] += 1

    trainval_txt.close()
    train_txt.close()
    val_txt.close()
    test_txt.close()

    res_txt.write('trainval: {}'.format(num[0]) + '\n')
    res_txt.write('train: {}'.format(num[1]) + '\n')
    res_txt.write('val: {}'.format(num[2]) + '\n')
    res_txt.write('test: {}'.format(num[3]))
    res_txt.close()

    print('yes, saved in {}'.format(save_main_dir))
    print('trainval: {}'.format(num[0]))
    print('train: {}'.format(num[1]))
    print('val: {}'.format(num[2]))
    print('test: {}'.format(num[3]))


def split_abspath(images_dir, imagesets_main_dir, save_dir):
    
    assert os.path.exists(images_dir), f'\"{images_dir}\" not exists. voc_labels over!'
    assert os.path.exists(imagesets_main_dir), f'\"{imagesets_main_dir}\" not exists. voc_labels over!'

    sets = ['train', 'val', 'test', 'trainval']
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    for s in sets:
        txt = os.path.join(imagesets_main_dir, s + '.txt')
        with open(txt, 'r') as f:
            file_list = f.read().strip().split()

        save_txt = os.path.join(save_dir, s + '.txt')
        with open(save_txt, 'w') as f:
            for file in file_list:
                f.write(os.path.join(images_dir, file + '.jpg\n'))

if __name__ == '__main__':

    # Path
    images_dir = r'JPEGImages'
    xml_dir = r'Annotations'
    save_dir1 = r'ImageSets'

    split_imagesets_main(images_dir, xml_dir, save_dir1)

参考 [1]

  • 0
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值