xml文件解析
导出xml的解析包
import xml.etree.ElementTree as ET
读取xml内容代码
def read_xml(xml_path):
'''
统计 xml_path 文件属于列表useful_class中的目标
'''
etree = ET.parse(xml_path)
eroot = etree.getroot()
for child in eroot.findall('object'):
name = child.find('name').text # 获取当前子节点类别
if name in useful_class:
obj_dict[name] += 1
文件结构
--- /home/shares/myproj/dir1
--- 1.xml
--- 2.xml
...
--- /home/shares/myproj/dir2
--- 1.xml
--- 2.xml
...
...
需要统计dir1与dir2下所有xml文件中属于useful_class类别的数量框
完整代码
'''
hqauto:
Description:
version:
Author: YYQQ.
Date: 2023-03-06 14:50:45
LastEditors: Please set LastEditors
LastEditTime: 2023-03-06 15:22:27
'''
import os
import xml.etree.ElementTree as ET
# 需要统计的类别列表
useful_class = ['normalface', 'callface', 'smokeface', 'cigaretteface']
# 创建空字典
obj_dict ={}
for cls in useful_class:
obj_dict[cls]= 0
def read_xml(xml_path):
'''
统计 xml_path 文件属于列表useful_class中的目标
'''
etree = ET.parse(xml_path)
eroot = etree.getroot()
for child in eroot.findall('object'):
name = child.find('name').text # 获取当前子节点类别
if name in useful_class:
obj_dict[name] += 1
def main():
# 需要统计的目录列表
xml_dir_list = ['/home/shares/myproj/dir1',
'/home/shares/myproj/dir2',
] # 需要统计的文件路径
# 遍历目录列表
for xml_dir in xml_dir_list:
# 路径不存在则报错
assert os.path.exists(xml_dir), print('input xml_dir dose not exist, please input again.')
print(xml_dir + ':')
xml_files = [x for x in os.listdir(xml_dir) if x.endswith('.xml')] # 获取所有xml文件
xml_nums = len(xml_files) # 当前目录下xml的数量
xml_index = 1 # 计数
for xml_file in xml_files:
xml_path = os.path.join(xml_dir, xml_file)
read_xml(xml_path)
print('\rprocessing xml : [{} / {}]'.format(xml_index, xml_nums), end = "")
xml_index += 1
if __name__ == "__main__":
main()
print('\n', obj_dict)