过小标注框可能是错误标注,或者对训练有干扰,运行下面python代码即可批量删除xml长宽小于15*15的标注框,标注框大小可自己调节。
一、删除xml长宽小于15*15的标注框
import os
import xml.etree.ElementTree as ET
from xml.dom.minidom import parse
from xml.etree.ElementTree import ElementTree,Element
import numpy as np
# file road
xml_ano = r'./Annotations'
xml_ano1 = r'./Annotations'
xml_list = os.listdir(xml_ano)
for xml_pa in xml_list:
xml_path = xml_ano + '/' + xml_pa#xml文件的绝对路径
tree = ET.parse(xml_path)
root = tree.getroot()
# print(root)
nodes = root.findall('object')
# print(nodes)
for element1 in root.findall('object'):
# print(element1)
for element2 in element1.findall('bndbox'):
xmin = element2.find('xmin').text
ymin= element2.find('ymin').text
xmax = element2.find('xmax').text
ymax = element2.find('ymax').text
mult = float(xmax) - float(xmin)
mult1 = float(ymax) - float(ymin)
if mult <=15 and mult1 <=15:
# print(xml_path)
root.remove(element1)
print(xmin, ymin, xmax, ymax)
tree.write(xml_ano1+'/' + xml_pa, encoding="utf-8",xml_declaration=True)
二、再删除所有空标注框的xml文件,运行下面python代码:
import xml.etree.ElementTree as ET
import os
# 批量删除整个文件夹所有的xml文件
def delete_all_xml(xml_path):
filelist = os.listdir(xml_path)
# 打开xml文档
for xmlfile in filelist:
doc = ET.parse(xml_path + xmlfile)
root = doc.getroot()
a = root.findall('object')
if len(a) ==0:
os.remove(xml_path+xmlfile)
if __name__ == '__main__':
xml_path = "./Annotations/"
delete_all_xml(xml_path)
三、再运行下面代码,同步删除空xml对应的图片即可:
import os
images_dir = './JPEGImages/'
xml_dir = './Annotations/'
#创建列表
xmls = []
#读取xml文件名(即:标注的图片名)
for xml in os.listdir(xml_dir):
#xmls.append(os.path.splitext(xml)[0]) #append()参数:在列表末尾添加新的对象,即将所有文件名读入列表
xmls.append(xml.split('.')[0])#splitext和split的区别:前者('0001','.jpg'), 后者('0001','jpg') 在此可选用
print(xmls)
#读取所有图片
for image_name in os.listdir(images_dir):
image_name = image_name.split('.')[0]
if image_name not in xmls:
image_name = image_name + '.jpg'
print(image_name)
os.remove(os.path.join(images_dir,image_name))