只需要将xml转成txt格式即可。
import xml.etree.ElementTree as ET
import os
category_set = set()
category_dict = {}
def xml2txt(xml_path):
# 解析XML文件
tree = ET.parse(xml_path)
root = tree.getroot()
# 获取width和height
width = int(root.find('size/width').text)
height = int(root.find('size/height').text)
# 提取object信息
objects = []
for obj in root.findall('objects/object'):
points = obj.findall('points/point')[:4] # 只提取前四个point
points_data = [(float(point.text.split(',')[0]) / width, float(point.text.split(',')[1]) / height) for point in points]
points_data_txt = [item for sublist in points_data for item in sublist]
category = obj.find('possibleresult/name').text
# 确定类别ID
if category in category_set:
category_id = list(category_set).index(category)
else:
category_set.add(category)
category_id = len(category_set) - 1
category_dict[category_id] = category
objects.append({'points': points_data_txt, 'category_id': category_id, 'category': category})
# 将提取的信息保存到txt文件中
txt_save_dir = os.path.dirname(xml_path).replace('labelXml', 'labeltxt')
txt_name = os.path.basename(xml_path).replace('.xml', '.txt')
txt_file_path = os.path.join(txt_save_dir, txt_name)
with open(txt_file_path, 'w') as txt_file:
for obj in objects:
txt_file.write(f'{obj["category_id"]} ')
points_str = ' '.join(map(str, obj['points']))
txt_file.write(f'{points_str}\n')
# txt_file.write(f'Category: {obj["category"]}\n\n')
print(f'提取的信息已保存到文件: {txt_file_path}')
if __name__ == '__main__':
xmls_dir = r'E:\project\Dataset\fairm\train\total\labelXml'
all_xmls = os.listdir(xmls_dir)
for xml in all_xmls:
xml_path = os.path.join(xmls_dir, xml)
xml2txt(xml_path)
print(category_dict)