# 导入os和xml.etree.ElementTree模块
import os
import xml.etree.ElementTree as ET
if __name__ == '__main__':
# 指定文件夹路径和新的路径前缀
folder_path = r'D:\datasets\smoking\Annotations'
new_path_prefix = '/root/dataset/smoking'
# 获取文件夹中所有的xml文件
xml_files = [f for f in os.listdir(folder_path) if f.endswith('.xml')]
print('Start.')
classes_count = {}
for i, xml_file in enumerate(xml_files):
# 加载xml文件
tree = ET.parse(os.path.join(folder_path, xml_file))
root = tree.getroot()
# ------------- 修改filename元素 start -------------
filename_element = root.find('filename')
if filename_element is None:
filename_element = ET.SubElement(root, "filename")
filename_text = os.path.splitext(xml_file)[0] + '.jpg'
filename_element.text = filename_text
# ------------- 修改filename元素 end -------------
# ------------- 修改path元素 start -------------
path_element = root.find('path')
if path_element is None:
path_element = ET.SubElement(root, "path")
path_element.text = os.path.join(new_path_prefix, filename_text)
# ------------- 修改path元素 end -------------
# ----------- 去掉<path>元素 start -----------
# 如果不需要path元素或其他元素,可以参考此处代码
# while True:
# path_element = root.find('path')
# if path_element is not None:
# root.remove(path_element)
# else:
# break
# ----------- 去掉<path>元素 end -----------
# ------------- 遍历所有的<object>元素 start -------------
for obj in root.findall('object'):
name = obj.find('name').text
# 修改smoke为smoking
if name == 'smoke':
obj.find('name').text = 'smoking'
else:
print(xml_file, name)
# 统计xml文件中不同类别名称出现的次数
if classes_count.get(name) is None:
classes_count[name] = 0
else:
classes_count[name] += 1
# 保存修改后的xml文件
tree.write(os.path.join(folder_path, xml_file))
# 以进度条展示当前处理进度
progress = int(100 * (i + 1) / len(xml_files))
finish = "▓" * progress
need_do = "-" * (100 - progress)
print(f"\r{progress}% |{finish}{need_do}| {i + 1}/{len(xml_files)}.", end="")
print()
print(f'img number: {len(xml_files)}, classes count:', classes_count)
print('Done.')
修改和删除voc数据集xml文件中的元素
最新推荐文章于 2024-06-21 03:37:38 发布