数据集voc格式以xml为后缀,YOLO网络需要将其转化为.txt为后缀。
import xml.etree.ElementTree as ET
import os
def convert(size, box):
x_center = (box[0] + box[1]) / 2.0
y_center = (box[2] + box[3]) / 2.0
# 分别计算纵坐标和横坐标的中心点
x = x_center / size[0]
y = y_center / size[1]
w = (box[1] - box[0]) / size[0]
h = (box[3] - box[2]) / size[1]
# print(x, y, w, h)
return (x, y, w, h)
def convert_annotation(xml_files_path, save_txt_files_path, classes):
xml_files = os.listdir(xml_files_path)
print(xml_files)
for xml_name in xml_files:
print(xml_name)
xml_file = os.path.join(xml_files_path, xml_name)
out_txt_path = os.path.join(save_txt_files_path, xml_name.split('.')[0] + '.txt')
out_txt_f = open(out_txt_path, 'w')
tree = ET.parse(xml_file)
root = tree.getroot()
size = root.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text)
for obj in root.iter('object'):
cls = obj.find('name').text
if cls not in classes:
continue
cls_id = classes.index(cls)
xmlbox = obj.find('bndbox')
b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),
float(xmlbox.find('ymax').text))
bb = convert((w, h), b)
out_txt_f.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
# Check if 'difficult' exists before accessing its text
difficult = obj.find('difficult')
if difficult is not None and difficult.text is not None:
if int(difficult.text) == 1:
continue
else:
# Handle the case where 'difficult' element or its text is missing
# For example, you could assign a default value or log a warning
pass
if __name__ == "__main__":
classes1 = ["Car", "Cyclist", "Pedestrian"]
xml_files1 = r'D:\programsoftware\PyCharm\YOLOv8\ultralytics-main\datasets\mydata\Annotations'
save_txt_files1 = r'D:\programsoftware\PyCharm\YOLOv8\ultralytics-main\datasets\my_data\labels\train_labels'
convert_annotation(xml_files1, save_txt_files1, classes1)
with open(os.path.join(save_txt_files1, 'classes.txt'), 'w') as file:
for class_name in classes1:
file.write(class_name + '\n')
需要更改的地方为:
其中,classes1是自己数据集的类别,后两个是文件路径。