xml迭代标签内容

最新推荐文章于 2022-09-06 16:57:24 发布

只想整天学习

最新推荐文章于 2022-09-06 16:57:24 发布

阅读量478

点赞数

分类专栏： Python 文章标签： xml搜索text的值

本文链接：https://blog.csdn.net/zZzZzZ__/article/details/101381186

版权

Python 专栏收录该内容

26 篇文章 0 订阅

订阅专栏

迭代找出xml文件中二级子标签中的height和width的同时都为300的文件打印出文件名并且统计文件的个数

from xml.etree import ElementTree as ET
import os
path = '/home/ly/Desktop/enhance_process/train/xml'
width = None
height = None
num = 0
files = os.listdir(path)
for file in files:
    tree = ET.parse(os.path.join(path, file))  # 打开movies.xml并以树结构返回给tree
    root = tree.getroot()  # 获取xml文件的根节点(最顶层标签)
    for node in root.iter('annotation'):  # 迭代最顶层标签里的所有子标签
        for i in node:  # 迭代子标签(一级标签)
            for j in i:  # 迭代子标签的子标签(二级标签)
                if j.tag=="width":
                    if j.text=='300':
                        width = j.text
                if j.tag=="height":
                    if j.text=='300':
                        height = j.text
    if width=='300' and height=='300':
        print(os.path.join(path, file))#打印tag是width且text=='300' && tag是height且text=='300'的xml文件
        num += 1
    width=''
    height=''
print(num) #打印tag是width且text=='300' && tag是height且text=='300'的xml文件出现的次数

找出xml文件中二级子标签中的height和width的同时都为300的文件,并且找出与其名称相同的jpg文件,然后删除对应的xml和jpg文件

from xml.etree import ElementTree as ET
import os
path = '/home/ly/Desktop/enhance_process/train/xml'
jpgpath= '/home/ly/Desktop/enhance_process/train/images'
width = None
height = None
num = 0
xmlfiles = sorted(os.listdir(path))
destfiles=[]
jpgfiles=[]
for file in xmlfiles:
    tree = ET.parse(os.path.join(path, file))  # 打开movies.xml并以树结构返回给tree
    root = tree.getroot()  # 获取xml文件的根节点(最顶层标签)
    for node in root.iter('annotation'):  # 迭代最顶层标签里的所有子标签
        for i in node:  # 迭代子标签(一级标签)
            for j in i:  # 迭代子标签的子标签(二级标签)
                if j.tag=="width":
                    if j.text=='300':
                        width = j.text
                if j.tag=="height":
                    if j.text=='300':
                        height = j.text
    if width=='300' and height=='300':
        print(os.path.join(path, file))#打印tag是width且text=='300' && tag是height且text=='300'的xml文件
        destfiles.append(os.path.join(path, file))
        num += 1
    width=''
    height=''
print(num) #打印tag是width且text=='300' && tag是height且text=='300'的xml文件出现的次数

for i in destfiles:
    srcpath = os.path.splitext(i)[0]
    newpath = srcpath + '.jpg'
    jpgfiles.append(os.path.split(newpath)[1])

for i in jpgfiles:
    jpgfile = os.path.join(jpgpath,i)
    os.remove(jpgfile)
    print('remove--->'+jpgfile)
for i in destfiles:
    os.remove(i)
    print('remove--->'+i)