做DL实验需要统计voc数据集的GT的数量,找了一圈,没找到,只能自己写一个,枯了
这个代码是计算数据集中所有的GT的大小的,当然数据必须是xml文件形式的
这个代码: 主要是针对,做图像识别中统计数据的,比如说
目标检测中,ground truth有大有小,我们想统计dataset中所有的GT的大小类别,因为coco数据集将其大小分类为sml三个部分,这个代码就是做分类的
"""
read xml file and statistics the number of small &middle &large object
"""
import os
import xml.etree.ElementTree as ET
from xml.dom.minidom import parse
# file road
xml_ano = 'D:\\make_datasets\\bagage\\Annotations'
xml_list = os.listdir(xml_ano)
num_s = 0
num_m = 0
num_l = 0
for xml_pa in xml_list:
xml_path = xml_ano + '/' +xml_pa
domTree = parse(xml_path)
rootNode = domTree.documentElement
#print(rootNode.nodeName)
# get the name of object content
nodes = rootNode.getElementsByTagName("object")
for node in nodes:
# enter the name is bndbox content
bndbox = node.getElementsByTagName("bndbox")[0]
index = bndbox.getElementsByTagName("xmin")[0]
xmin = index.childNodes[0].data
index = bndbox.getElementsByTagName("ymin")[0]
ymin = index.childNodes[0].data
index = bndbox.getElementsByTagName("xmax")[0]
xmax = index.childNodes[0].data
index = bndbox.getElementsByTagName("ymax")[0]
ymax = index.childNodes[0].data
mult = (int(xmax) - int(xmin)) * (int(ymax)-int(ymin))
#print(mult)
# you can change the request
if mult < 1024:
num_s = num_s +1
elif 1024<= mult <= 9216:
num_m = num_m + 1
else:
num_l = num_l + 1
print("small package num :", num_s)
print("middle package num :", num_m)
print("large package num :", num_l)