python根据xml统计各类型个数

本文介绍了两种方法统计XML文件中特定类别的对象数量,一种是单纯按类别计数,另一种是根据对象的宽高比区分大小类别。作者使用了Python的xml.dom.minidom和xml.etree.ElementTree库进行操作。
摘要由CSDN通过智能技术生成

参考链接:

http://t.csdnimg.cn/KIICz

两种方法索引XML中关键字都可以。

# 根据xml统计类型个数
import os
import xml.dom.minidom
import xml.etree.ElementTree as ET

def count_class(AnnoPath, cls):
    Annolist = os.listdir(AnnoPath)
    total_count = 0
    for annotation in Annolist:
        filename = os.path.join(AnnoPath,annotation)
        dom = xml.dom.minidom.parse(filename) # 打开XML文件
        collection = dom.documentElement # 获取元素对象
        objectlist = collection.getElementsByTagName('name') # 获取标签名为ad的信息
        count = 0
        for ob in objectlist:
            if ob.firstChild.data == cls: #获取第一个节点数据,因为name标签只有一个数据,所以获取的就是某个类别
                count += 1
        total_count += count
    print(cls, ":" ,total_count)


def count_class_withsize(AnnoPath, cls, small, medium):
    Annolist = os.listdir(AnnoPath)
    total_count = 0
    total_large_count = 0
    total_medium_count = 0
    total_small_count = 0

    for annotation in Annolist:
        
        large_count = 0
        medium_count = 0
        small_count = 0
        

        filename = os.path.join(AnnoPath,annotation)
        tree = ET.parse(filename)
        root = tree.getroot()
        size = root.find('size')
        W = int(size.find('width').text)
        H = int(size.find('height').text)
        

        for obj in root.iter('object'):
        
            xml_cls = obj.find('name').text
            if cls  == xml_cls :
                
                xmlbox = obj.find('bndbox')
                box = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),
                    float(xmlbox.find('ymax').text))

                ratio = cal_size((W, H), box)
                
                if 0 <= ratio <= small:
                    small_count += 1
                if small < ratio <= medium:
                    medium_count += 1
                if medium < ratio <= 1:
                    large_count +=1

        total_large_count += large_count
        total_medium_count += medium_count
        total_small_count += small_count
        total_count += small_count + medium_count + large_count
        
    print(cls, ":" , total_count)
    print(cls, "total_large_count:" , total_large_count)
    print(cls, "total_medium_count:" ,total_medium_count)
    print(cls, "total_small_count:" , total_small_count)
    
        

def cal_size(Size, box):
    size_area =  Size[0] * Size[1]
    box_area = (box[1] - box[0]) * (box[3] - box[2])
    ratio = box_area / size_area
    return ratio

if __name__ == '__main__':

    classes = ["1","2","3","4","5","6","7"]  
    AnnoPath = 'D:/folder/'

    # for cls in classes:
    #     count_class(AnnoPath, cls)
    
    # 类型数量 及 按大中小统计
    # 0.15 < large <= 1
    medium = 0.15 #  0.05 < medium <= 0.15
    small = 0.05 #  0 <= small <= 0.05
    for cls in classes:
        count_class_withsize(AnnoPath, cls, small, medium)
        print("\n")


  • 8
    点赞
  • 8
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值