xml文件存在没有object根目录的情况进行删除

小泽学编程

于 2022-11-23 11:08:30 发布

阅读量296

点赞数

分类专栏：图片标签与图片对应文章标签： xml python 数据库

本文链接：https://blog.csdn.net/weixin_52476063/article/details/127996571

版权

图片标签与图片对应专栏收录该内容

1 篇文章 0 订阅

订阅专栏

调用其他库可能存在无法解析的情况，根据xml文件中，annotation标签下的根目录判断是否存在object，并结合对应图片进行删除

import xml.dom.minidom
import os
path = r'E:\busandstation_data\Annotations'  # xml文件存放路径
# sv_path = r'D:\coco_unzip\myxmlsave'  # 修改后的xml文件存放路径
image_path = r'E:\busandstation_data\JPEGImages'  # img文件存放路径
img_name = os.listdir(image_path)
files = os.listdir(path)
count = 1
for xmlFile in files:
        # 加载xml文件到内存中
    dom = xml.dom.minidom.parse(os.path.join(path, xmlFile))  # 打开xml文件，送到dom解析
    # 获取根节点信息
    annotation = dom.getElementsByTagName('annotation')
    # 打印的结果是只有一个数据的列表   annotation = [<DOM Element: annotation at 0x1ffcd0ad310>]
    # 判断文件中是否存在annotation节点
    if annotation and len(annotation) > 0:
        # 根据tag名，从annotation中获取所有的tag=object的节点列表
        # 从annotation的打印信息可以看出，通过getElementsByTagName获取到的数据都是列表，所以需要使用annotation[0]
        annotationList = annotation[0].getElementsByTagName('object')
        if annotationList == []:
            count += 1
#             os.remove(os.path.join(path,xmlFile))
            # dom = xml.dom.minidom.parse(r'D:\coco_unzip\mybulidvoc_annotation\000000000724.xml')  # 打开xml文件，送到dom解析
            # root = dom.documentElement  # 得到文档元素对象
            # item = root.getElementsByTagName('name')  # 获取filename这一node名字及相关属性值
            # 获取根节点信息
            annotation = dom.getElementsByTagName('annotation')
            # 打印的结果是只有一个数据的列表   annotation = [<DOM Element: annotation at 0x1ffcd0ad310>]
            # 判断文件中是否存在annotation节点
            if annotation and len(annotation) > 0:
                # 根据tag名，从annotation中获取所有的tag=object的节点列表
                # 从annotation的打印信息可以看出，通过getElementsByTagName获取到的数据都是列表，所以需要使用annotation[0]
                filename = annotation[0].getElementsByTagName('filename')
                address = filename[0].firstChild.data
                print(address)
                new_name = address + '.jpg'
                os.remove(os.path.join(path,xmlFile))
                os.remove(os.path.join(image_path,new_name))
        else:
            print(False)
print(count)