划分数据集(分类、VOC)

1. 按比例划分分类所需数据集


import os
import random
import shutil
import time

def copyFile(fileDir, class_name):
     image_list = os.listdir(fileDir) # 获取图片的原始路径
     image_number = len(image_list)

     train_number = int(image_number * train_rate)
     train_sample = random.sample(image_list, train_number) # 从image_list中随机获取0.8比例的图像.
     test_sample = list(set(image_list) - set(train_sample))
     sample = [train_sample, test_sample]

     # 复制图像到目标文件夹
     for k in range(len(save_dir)):
         if os.path.isdir(save_dir[k] + class_name):
            for name in sample[k]:
                 shutil.copy(os.path.join(fileDir, name), os.path.join(save_dir[k] + class_name+'/', name))
         else:
             os.makedirs(save_dir[k] + class_name)
             for name in sample[k]:
                 shutil.copy(os.path.join(fileDir, name), os.path.join(save_dir[k] + class_name+'/', name))

if __name__ == '__main__':
     time_start = time.time()

     # 原始数据集路径
     origion_path = 'F:/1/'

     # 保存路径
     save_train_dir = 'F:/1/train/'
     save_test_dir = 'F:/1/test/'
     save_dir = [save_train_dir, save_test_dir]

     # 训练集比例
     train_rate = 0.8

     # 数据集类别及数量
     file_list = os.listdir(origion_path)
     num_classes = len(file_list)

     for i in range(num_classes):
         class_name = file_list[i]
         image_Dir = os.path.join(origion_path, class_name)
         copyFile(image_Dir, class_name)
         print('%s划分完毕!' % class_name)

     time_end = time.time()
     print('---------------')
     print('训练集和测试集划分共耗时%s!' % (time_end - time_start))

2. 划分VOC数据集

import os
import random

trainval_percent = 0.8
train_percent = 0.75
xmlfilepath = 'C:/Users/123/Desktop/VOC2007/Annotations'
txtsavepath = 'C:/Users/123/Desktop/VOC2007/ImageSets/Main'
total_xml = os.listdir(xmlfilepath)

num = len(total_xml)
list = range(num)
tv = int(num * trainval_percent)
tr = int(tv * train_percent)
trainval = random.sample(list, tv)
train = random.sample(trainval, tr)

ftrainval = open('C:/Users/123/Desktop/VOC2007/ImageSets/Main/trainval.txt', 'w')
ftest = open('C:/Users/123/Desktop/VOC2007/ImageSets/Main/test.txt', 'w')
ftrain = open('C:/Users/123/Desktop/VOC2007/ImageSets/Main/train.txt', 'w')
fval = open('C:/Users/123/Desktop/VOC2007/ImageSets/Main/val.txt', 'w')

for i in list:
    name = total_xml[i][:-4] + '\n'
    if i in trainval:
        ftrainval.write(name)
        if i in train:
            ftrain.write(name)
        else:
            fval.write(name)
    else:
        ftest.write(name)

ftrainval.close()
ftrain.close()
fval.close()
ftest.close()

  • 3
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值