split

最新推荐文章于 2024-09-23 14:24:34 发布

PrincessSaladin

最新推荐文章于 2024-09-23 14:24:34 发布

阅读量45

点赞数

文章标签： python

本文链接：https://blog.csdn.net/tianxianyua/article/details/133819224

版权

# coding:utf-8

import os
import random
import argparse

import pdb

dataroot = r'D:\projects\iron\bls\blsR1\dataroot\data'

supported_extension = ('.png', '.bmp')  # the type of images that allowed

parser = argparse.ArgumentParser()
# xml文件的地址，根据自己的数据进行修改 xml一般存放在Annotations下
parser.add_argument('--annotation_path', default=os.path.join(dataroot, 'labels'), type=str, help='input xml label path')
# 数据集的划分，地址选择自己数据下的ImageSets/Main
parser.add_argument('--txt_path', default=os.path.join(dataroot, r'ImageSets/Segmentation'), type=str, help='output txt label path')
opt = parser.parse_args()

trainval_percent = 0.8
train_percent = 0.7
annotationfilepath = opt.annotation_path
txtsavepath = opt.txt_path
total_annotation = os.listdir(annotationfilepath)
if not os.path.exists(txtsavepath):
    os.makedirs(txtsavepath)

num = len(total_annotation)
# print(num)
# pdb.set_trace()
list_index = range(num)
tv = int(num * trainval_percent)
tr = int(tv * train_percent)
trainval = random.sample(list_index, tv)
train = random.sample(trainval, tr)

file_trainval = open(txtsavepath + '/trainval.txt', 'w')
file_test = open(txtsavepath + '/test.txt', 'w')
file_train = open(txtsavepath + '/train.txt', 'w')
file_val = open(txtsavepath + '/val.txt', 'w')

txtsavepath2 = os.path.dirname(txtsavepath)
file_test2 = open(txtsavepath2 + '/test.txt', 'w')
file_train2= open(txtsavepath2 + '/train.txt', 'w')
file_val2 = open(txtsavepath2 + '/val.txt', 'w')

image_dir = os.path.join(dataroot, 'JPEGImages')
test_dir = os.path.join(dataroot, 'test')


def get_suffix(root):
    '''
    20230922
    Args:
        root: the folder saved 'JPEGImages' where only '.bmp' or '.png' images are in.

    Returns: str, suffix for images (for dataloader getting image data)
    '''
    img_path = os.path.join(root, r'JPEGImages')
    file_list = os.listdir(img_path)

    file_extension = None
    for eachfile in file_list:
        if eachfile.lower().endswith(supported_extension):
            file_extension = os.path.splitext(eachfile)[-1]
            return file_extension
        else:
            pass
    if not file_extension:
        assert False, 'Error: no expected format for image in the folder!'
        # return None


suffix = get_suffix(dataroot)
# print(suffix)  # .png
# assert False
for i in list_index:
    name = total_annotation[i][:-4]
    if i in trainval:
        file_trainval.write(name + '\n')
        # img_path = image_dir + '\\' + name + '.bmp\n'
        img_path = image_dir + '\\' + name + suffix +'\n'
        if i in train:
            file_train.write(name + '\n')
            file_train2.write(img_path)
        else:
            file_val.write(name + '\n')
            file_val2.write(img_path)
    else:
        img_path = image_dir + '\\' + name + suffix + '\n'

        
        # img_path = test_dir + '\\' + name + '.bmp\n'
        file_test.write(name + '\n')
        file_test2.write(img_path)

file_trainval.close()
file_train.close()
file_val.close()
file_test.close()

file_train2.close()
file_val2.close()
file_test2.close()