迭代找出xml文件中二级子标签中的height和width的同时都为300的文件打印出文件名并且统计文件的个数
from xml.etree import ElementTree as ET
import os
path = '/home/ly/Desktop/enhance_process/train/xml'
width = None
height = None
num = 0
files = os.listdir(path)
for file in files:
tree = ET.parse(os.path.join(path, file)) # 打开movies.xml并以树结构返回给tree
root = tree.getroot() # 获取xml文件的根节点(最顶层标签)
for node in root.iter('annotation'): # 迭代最顶层标签里的所有子标签
for i in node: # 迭代子标签(一级标签)
for j in i: # 迭代子标签的子标签(二级标签)
if j.tag=="width":
if j.text=='300':
width = j.text
if j.tag=="height":
if j.text=='300':
height = j.text
if width=='300' and height=='300':
print(os.path.join(path, file))#打印tag是width且text=='300' && tag是height且text=='300'的xml文件
num += 1
width=''
height=''
print(num) #打印tag是width且text=='300' && tag是height且text=='300'的xml文件出现的次数
找出xml文件中二级子标签中的height和width的同时都为300的文件,并且找出与其名称相同的jpg文件,然后删除对应的xml和jpg文件
from xml.etree import ElementTree as ET
import os
path = '/home/ly/Desktop/enhance_process/train/xml'
jpgpath= '/home/ly/Desktop/enhance_process/train/images'
width = None
height = None
num = 0
xmlfiles = sorted(os.listdir(path))
destfiles=[]
jpgfiles=[]
for file in xmlfiles:
tree = ET.parse(os.path.join(path, file)) # 打开movies.xml并以树结构返回给tree
root = tree.getroot() # 获取xml文件的根节点(最顶层标签)
for node in root.iter('annotation'): # 迭代最顶层标签里的所有子标签
for i in node: # 迭代子标签(一级标签)
for j in i: # 迭代子标签的子标签(二级标签)
if j.tag=="width":
if j.text=='300':
width = j.text
if j.tag=="height":
if j.text=='300':
height = j.text
if width=='300' and height=='300':
print(os.path.join(path, file))#打印tag是width且text=='300' && tag是height且text=='300'的xml文件
destfiles.append(os.path.join(path, file))
num += 1
width=''
height=''
print(num) #打印tag是width且text=='300' && tag是height且text=='300'的xml文件出现的次数
for i in destfiles:
srcpath = os.path.splitext(i)[0]
newpath = srcpath + '.jpg'
jpgfiles.append(os.path.split(newpath)[1])
for i in jpgfiles:
jpgfile = os.path.join(jpgpath,i)
os.remove(jpgfile)
print('remove--->'+jpgfile)
for i in destfiles:
os.remove(i)
print('remove--->'+i)