参考链接:
两种方法索引XML中关键字都可以。
# 根据xml统计类型个数
import os
import xml.dom.minidom
import xml.etree.ElementTree as ET
def count_class(AnnoPath, cls):
Annolist = os.listdir(AnnoPath)
total_count = 0
for annotation in Annolist:
filename = os.path.join(AnnoPath,annotation)
dom = xml.dom.minidom.parse(filename) # 打开XML文件
collection = dom.documentElement # 获取元素对象
objectlist = collection.getElementsByTagName('name') # 获取标签名为ad的信息
count = 0
for ob in objectlist:
if ob.firstChild.data == cls: #获取第一个节点数据,因为name标签只有一个数据,所以获取的就是某个类别
count += 1
total_count += count
print(cls, ":" ,total_count)
def count_class_withsize(AnnoPath, cls, small, medium):
Annolist = os.listdir(AnnoPath)
total_count = 0
total_large_count = 0
total_medium_count = 0
total_small_count = 0
for annotation in Annolist:
large_count = 0
medium_count = 0
small_count = 0
filename = os.path.join(AnnoPath,annotation)
tree = ET.parse(filename)
root = tree.getroot()
size = root.find('size')
W = int(size.find('width').text)
H = int(size.find('height').text)
for obj in root.iter('object'):
xml_cls = obj.find('name').text
if cls == xml_cls :
xmlbox = obj.find('bndbox')
box = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),
float(xmlbox.find('ymax').text))
ratio = cal_size((W, H), box)
if 0 <= ratio <= small:
small_count += 1
if small < ratio <= medium:
medium_count += 1
if medium < ratio <= 1:
large_count +=1
total_large_count += large_count
total_medium_count += medium_count
total_small_count += small_count
total_count += small_count + medium_count + large_count
print(cls, ":" , total_count)
print(cls, "total_large_count:" , total_large_count)
print(cls, "total_medium_count:" ,total_medium_count)
print(cls, "total_small_count:" , total_small_count)
def cal_size(Size, box):
size_area = Size[0] * Size[1]
box_area = (box[1] - box[0]) * (box[3] - box[2])
ratio = box_area / size_area
return ratio
if __name__ == '__main__':
classes = ["1","2","3","4","5","6","7"]
AnnoPath = 'D:/folder/'
# for cls in classes:
# count_class(AnnoPath, cls)
# 类型数量 及 按大中小统计
# 0.15 < large <= 1
medium = 0.15 # 0.05 < medium <= 0.15
small = 0.05 # 0 <= small <= 0.05
for cls in classes:
count_class_withsize(AnnoPath, cls, small, medium)
print("\n")