xml迭代标签内容

 迭代找出xml文件中二级子标签中的height和width的同时都为300的文件打印出文件名并且统计文件的个数

from xml.etree import ElementTree as ET
import os
path = '/home/ly/Desktop/enhance_process/train/xml'
width = None
height = None
num = 0
files = os.listdir(path)
for file in files:
    tree = ET.parse(os.path.join(path, file))  # 打开movies.xml并以树结构返回给tree
    root = tree.getroot()  # 获取xml文件的根节点(最顶层标签)
    for node in root.iter('annotation'):  # 迭代最顶层标签里的所有子标签
        for i in node:  # 迭代子标签(一级标签)
            for j in i:  # 迭代子标签的子标签(二级标签)
                if j.tag=="width":
                    if j.text=='300':
                        width = j.text
                if j.tag=="height":
                    if j.text=='300':
                        height = j.text
    if width=='300' and height=='300':
        print(os.path.join(path, file))#打印tag是width且text=='300' && tag是height且text=='300'的xml文件
        num += 1
    width=''
    height=''
print(num) #打印tag是width且text=='300' && tag是height且text=='300'的xml文件出现的次数

找出xml文件中二级子标签中的height和width的同时都为300的文件,并且找出与其名称相同的jpg文件,然后删除对应的xml和jpg文件

from xml.etree import ElementTree as ET
import os
path = '/home/ly/Desktop/enhance_process/train/xml'
jpgpath= '/home/ly/Desktop/enhance_process/train/images'
width = None
height = None
num = 0
xmlfiles = sorted(os.listdir(path))
destfiles=[]
jpgfiles=[]
for file in xmlfiles:
    tree = ET.parse(os.path.join(path, file))  # 打开movies.xml并以树结构返回给tree
    root = tree.getroot()  # 获取xml文件的根节点(最顶层标签)
    for node in root.iter('annotation'):  # 迭代最顶层标签里的所有子标签
        for i in node:  # 迭代子标签(一级标签)
            for j in i:  # 迭代子标签的子标签(二级标签)
                if j.tag=="width":
                    if j.text=='300':
                        width = j.text
                if j.tag=="height":
                    if j.text=='300':
                        height = j.text
    if width=='300' and height=='300':
        print(os.path.join(path, file))#打印tag是width且text=='300' && tag是height且text=='300'的xml文件
        destfiles.append(os.path.join(path, file))
        num += 1
    width=''
    height=''
print(num) #打印tag是width且text=='300' && tag是height且text=='300'的xml文件出现的次数

for i in destfiles:
    srcpath = os.path.splitext(i)[0]
    newpath = srcpath + '.jpg'
    jpgfiles.append(os.path.split(newpath)[1])

for i in jpgfiles:
    jpgfile = os.path.join(jpgpath,i)
    os.remove(jpgfile)
    print('remove--->'+jpgfile)
for i in destfiles:
    os.remove(i)
    print('remove--->'+i)

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值