一、简介
前一节讲了将xml格式文件转为txt文件,此节是将保存的批量txt文件路径存入总的txt文件中便于读取。
二、说明
在代码中分了训练集和测试集。
三、完整代码
import os
import random
if __name__ == "__main__":
# 定义图片和标签将要保存的根路径
dst_root_path = "E:/dataSet/meng_data/DATA"
# 定义标签保存的路径
dst_label_path = "train_labels"
# 定义标签索引保存的路径
train_list = "trainlist.txt"
test_list = "testlist.txt"
f_train = open(os.path.join(dst_root_path, train_list), 'w')
f_test = open(os.path.join(dst_root_path, test_list), 'w')
label_name = os.listdir(os.path.join(dst_root_path, dst_label_path))
# print(label_name)
index = 1
count_train = 0
count_test = 0
for i in label_name:
if i.startswith('.'):
continue
sub_folders = os.path.join(dst_root_path, dst_label_path, i)
# print(sub_folders)
list_sub_folder = os.listdir(sub_folders)
# 训练集和测试集的比例
random.shuffle(list_sub_folder)
segment = len(list_sub_folder) * 0.8
# train_list = list_sub_folder[:segment]
# test_list = list_sub_folder[segment:]
seg = 0
# print(list_sub_folder)
for sub_folder in list_sub_folder:
folder = os.path.join(sub_folders, sub_folder)
print(folder)
for root, dirs, files in os.walk(folder):
for f in files:
file_path = os.path.join(folder, f)
# print(file_path)
if seg < segment:
f_train.write(i+'/'+sub_folder+'/'+f + '\n')
count_train += 1
else:
f_test.write(i+'/'+sub_folder+'/'+f + '\n')
count_test += 1
seg += 1
f_train.close()
f_test.close()
print('train_count:', count_train)
print('test_count', count_test)
执行结果如下: