在训练时发现数据集有问题,经过debug发现是xml文件有问题,于是排除是否是名字还是size的问题
import os.path
import xml.dom.minidom as xmldom
#coding=utf-8
import xml.etree.ElementTree as ET
path = '/mnt/mdisk/pyq/Annotation'
xml = os.listdir(path)
xml.sort() #排序
修改节点的内容 : //Element.text = '' : 直接改变字段内容
for xmlfile in xml:
tree = ET.parse(os.path.join(path, xmlfile)) # 读取待修改文件
root = tree.getroot()
filename = root.findall('filename') #找到filename的元素
name = filename[0].text # 访问元素的文本
if name == xmlfile[:-3]+'jpg':
print('right')
else:
filename[0].text = xmlfile[:-3] + 'jpg'
tree.write(os.path.join(path, xmlfile)) # 写回原文件
print('revise filename')
#查看size的是否为空
for xmlFile in xml:
tree = ET.parse(os.path.join(path, xmlFile))
root = tree.getroot()
size = root.findall('size')
width = int(size[0].find('width').text)
print(width)
height = int (size[0].find('height').text)
print(height)
depth = int (size[0].find('depth').text)
print(depth)
if width == None or height == None or depth ==None:
os.remove(path, xmlFile)
print("remove" + xmlFile + " is OK!")