转换标签文件:xml格式转为txt格式
import os.path
import xml.etree.ElementTree as ET
import os
import random
class_names = ['hat', 'person']
xmlpath = './VOC2028/Annotations/'
txtpath = './labels/'
if not os.path.exists(txtpath):
os.makedirs(txtpath)
files = []
for root, dirs, files in os.walk(xmlpath):
None
print(xmlpath)
number = len(files)
print(number)
i = 0
while i < number:
name = files[i][0:-4]
xml_name = name + ".xml"
txt_name = name + ".txt"
xml_file_name = xmlpath + xml_name
txt_file_name = txtpath + txt_name
xml_file = open(xml_file_name, encoding='utf-8')
tree = ET.parse(xml_file)
root = tree.getroot()
filename = root.find('filename').text
image_name = root.find('filename').text
w = int(root.find('size').find('width').text)
h = int(root.find('size').find('height').text)
f_txt = open(txt_file_name, 'w+')
content = ""
first = True
for obj in root.iter('object'):
name = obj.find('name').text
class_num = class_names.index(name)
xmlbox = obj.find('bndbox')
x1 = int(xmlbox.find('xmin').text)
x2 = int(xmlbox.find('xmax').text)
y1 = int(xmlbox.find('ymin').text)
y2 = int(xmlbox.find('ymax').text)
if first:
content += str(class_num) + " " + \
str((x1 + x2) / 2 / w) + " " + str((y1 + y2) / 2 / h) + " " + \
str((x2 - x1) / w) + " " + str((y2 - y1) / h)
first = False
else:
content += "\n" + \
str(class_num) + " " + \
str((x1 + x2) / 2 / w) + " " + str((y1 + y2) / 2 / h) + " " + \
str((x2 - x1) / w) + " " + str((y2 - y1) / h)
print(content)
f_txt.write(content)
f_txt.close()
xml_file.close()
i += 1
print("done!")
数据集划分
'''
脚本功能:划分数据集
'''
import os
import random
import shutil
import time
def copyFile(fileDir, origion_path1, class_name):
name = class_name
path = origion_path1
image_list = os.listdir(fileDir)
image_number = len(image_list)
train_number = int(image_number * train_rate)
train_sample = random.sample(image_list, train_number)
test_sample = list(set(image_list) - set(train_sample))
sample = [train_sample, test_sample]
for k in range(len(save_dir)):
if os.path.isdir(save_dir[k]) and os.path.isdir(save_dir1[k]):
for name in sample[k]:
name1 = name.split(".")[0] + '.txt'
shutil.copy(os.path.join(fileDir, name), os.path.join(save_dir[k], name))
shutil.copy(os.path.join(path, name1), os.path.join(save_dir1[k], name1))
else:
os.makedirs(save_dir[k])
os.makedirs(save_dir1[k])
for name in sample[k]:
name1 = name.split(".")[0] + '.txt'
shutil.copy(os.path.join(fileDir, name), os.path.join(save_dir[k], name))
shutil.copy(os.path.join(path, name1), os.path.join(save_dir1[k], name1))
if __name__ == '__main__':
time_start = time.time()
origion_path = './images'
origion_path1 = './labels'
save_train_img = './coco/images/train'
save_val_img = './coco/images/val'
save_train_labels = './coco/labels/train'
save_val_labels = './coco/labels/val'
save_dir = [save_train_img, save_val_img]
save_dir1 = [save_train_labels, save_val_labels]
train_rate = 0.7
file_list = os.listdir(origion_path)
num_classes = len(file_list)
for i in range(num_classes):
class_name = file_list[i]
copyFile(origion_path, origion_path1, class_name)
print('划分完毕!')
time_end = time.time()
print('---------------')
print('训练集和测试集划分共耗时%s!' % (time_end - time_start))