使用xml.dom.minidom对xml进行解析

最新推荐文章于 2024-07-12 15:57:17 发布

拾光123

最新推荐文章于 2024-07-12 15:57:17 发布

阅读量2.8k

点赞数

分类专栏： python基础

本文链接：https://blog.csdn.net/kkkxiong1/article/details/106418526

版权

python基础专栏收录该内容

13 篇文章 0 订阅

订阅专栏

使用xml.dom.minidom对xml进行解析

<?xml version="1.0" encoding="utf-8"?>
</annotations>
	<version>1.1</version>
  <meta>
    <task>
      ...
  </meta>
  <image id="0" name="pose_0420_task9_0000.jpg" width="1080" height="1920">
    <box label="person" occluded="0" xtl="438.90" ytl="170.92" xbr="715.86" ybr="1317.40">
    </box>
    <polyline label="right_side" occluded="0" points="596.00,808.10;564.00,631.00;533.00,410.00;543.00,790.00;543.00,1011.00;514.40,1261.30">
    </polyline>
    <polyline label="left_side" occluded="1" points="578.90,798.10;543.00,621.00;522.00,399.00;533.00,779.00;554.00,1011.00;524.40,1219.80">
    </polyline>
    <polyline label="head"
<image id="0" name="pose_0420_task9_0000.jpg" width="1080" height="1920">
    <box label="person" occluded="0" xtl="438.90" ytl="170.92" xbr="715.86" ybr="1317.40">
    </box>
    <polyline label="right_side" occluded="0" points="596.00,808.10;564.00,631.00;533.00,410.00;543.00,790.00;543.00,1011.00;514.40,1261.30">
    </polyline>
    <polyline label="left_side" occluded="1" points="578.90,798.10;543.00,621.00;522.00,399.00;533.00,779.00;554.00,1011.00;524.40,1219.80">
    </polyline>
    <polyline label="head" occluded="0" points="668.49,199.19;578.33,360.01">
    </polyline>
  </image>
  <image id="1" name="pose_0420_task9_0001.jpg" width="1080" height="1920">
    <box label="person" occluded="0" xtl="408.40" ytl="174.55" xbr="742.27" ybr="1312.90">
    </box>
    <polyline label="right_side" occluded="0" points="591.10,816.10;556.40,625.50;536.90,404.60;533.00,758.00;533.00,1019.00;512.00,1248.00">
    </polyline>
    <polyline label="head" occluded="0" points="688.20,217.90;585.50,353.30">
    </polyline>
    <polyline label="left_side" occluded="1" points="544.00,780.00;544.00,617.00;512.00,393.20;501.00,758.00;696.00,878.00;495.20,972.90">
    </polyline>
  </image>
  <image id="2" name="pose_0420_task9_0002.jpg" width="1080" height="1920">
    <box label="person" occluded="0" xtl="415.80" ytl="213.73" xbr="713.20" ybr="1265.60">
    </box>
    <polyline label="left_side" occluded="1" points="583.00,745.00;523.00,576.00;493.00,406.00;483.00,735.00;533.00,955.00;513.30,1211.50">
    </polyline>
    <polyline label="head" occluded="0" points="658.90,251.30;569.52,377.23">
    </polyline>
    <polyline label="right_side" occluded="0" points="603.00,755.00;533.00,596.00;533.00,416.00;533.00,745.00;673.00,925.00;496.40,1110.10">
    </polyline>
  </image>
</annotations>

上面是一个xml文件，解析的思路是重写xml。先直接读取整个xml文件，将头信息写入到新的xml，之后用xmldom.parse对xml进行解析。需要注意的是：

getElementsByTagName是对整个xml文件的信息进行提取，然后可以用遍历的方式访问内部的节点；
getAttribute用于访问节点的内部信息；

整个解析比较简单，代码如下：


import cv2
import os
import xml.dom.minidom as xmldom
import numpy as np

image_info='''\
  <image id="%d" name="%s" width="%d" height="%d">
'''
image_info_end='''\
  </image>
'''

box_info='''\
    <box label="%s" occluded="0" xtl="%.2f" ytl="%.2f" xbr="%.2f" ybr="%.2f">
    </box>
'''
points_info='''\
    <points label="%s" occluded="%s" points="%s">
    </points>
'''

ployline_info = '''\
    <polyline label="%s" occluded="%s" points="%s">
    </polyline>
'''

ploygon_info='''\
    <polygon label="%s" occluded="0" points="%s">
    </polygon>
'''
end_info='''\
</annotations>
'''

def get_image_info(obj):
    return int(obj.getAttribute("id")), obj.getAttribute("name"),\
           int(obj.getAttribute("width")), int(obj.getAttribute("height"))

def get_box_info(obj):
    return [float(obj.getAttribute("xtl")), float(obj.getAttribute("ytl")),float(obj.getAttribute("xbr")),float(obj.getAttribute("ybr"))]

def get_polyline_info(obj):
    return obj.getAttribute("label"),[int(float(i)) for i in obj.getAttribute("points").replace(';',' ').replace(',',' ').split(' ')]

def get_polyline_info_new(obj):
    return obj.getAttribute("label"),obj.getAttribute("occluded"),obj.getAttribute("points")


def  write(cvat_xml_path,rewrite_file_path):
        cvat_xml = open(cvat_xml_path, "r").readlines()
        if len(cvat_xml)>=59:
            line_end=58
        else:
            line_end = 55
        rewrite_file = open(rewrite_file_path, "w")

        rewrite_file.writelines(cvat_xml[:line_end])
        # cvat_xml.close()
        # count,name, h, w, box_info_i, ployline_left_i, ployline_right_i, flag = resolve_xml_xl(cvat_xml_path)
        domobj = xmldom.parse(cvat_xml_path)
        print("xmldom.parse:", type(domobj))
        elementobj = domobj.documentElement
        subImageObj = elementobj.getElementsByTagName("image")
        for image_obj in subImageObj:
            count=0
            id, filename, w, h = get_image_info(image_obj)
            image_info_i = image_info % (id, filename, int(w), int(h))
            rewrite_file.write(image_info_i)
            subBoxObj = image_obj.getElementsByTagName("box")
            x1, y1, x2, y2 = get_box_info(subBoxObj[0])
            box_info_i = box_info % ('person', x1,y1,x2,y2)
            rewrite_file.write(box_info_i)
            subPolylineObj = image_obj.getElementsByTagName("polyline")
            left_waist_point = ''
            right_waist_point = ''
            label_list = []
            for sub_ployline in subPolylineObj:
                count +=1
                # if sub_ployline.getAttribute("label")=="head":
                label,occluded,pre_points = get_polyline_info_new(sub_ployline)
                label_list.append(label)
                ployline_image = ployline_info % (label,occluded, pre_points)
                list_point = pre_points.split(';')
                last_point = list_point[-1]
                if(label=='left_side' and len(list_point)!=6 ):
                    print(filename, id,'left_side points is not 6')
                if (label == 'right_side' and len(list_point) != 6):
                    print(filename, id,'right_side points is not 6')
                if(label=='left_side' and len(list_point)==6):
                    left_waist_point = list_point[3]
                if (label == 'right_side' and len(list_point)==6):
                    right_waist_point = list_point[3]
                if(len(left_waist_point)>0 and len(right_waist_point)>0 and count==3):
                    waist_point = left_waist_point +";" +right_waist_point
                    points_waist_point = ployline_info % ('person','0', waist_point)
                    rewrite_file.write(points_waist_point)
                    left_waist_point=''
                    right_waist_point=''
                    count=0
                    if len(set(label_list)) != 3:
                        print(filename,id,'name is repeated')
                    label_list.clear()
                # print(last_point,type(last_point))
                # last_point = [pre_points[-2], pre_points[-1]]
                # print(last_point, type(last_point), last_point[0])
                # str_last_point = str(last_point[0]) + "," + str(last_point[1])
                # print(str_last_point, type(str_last_point),len(str_last_point))
                # # str_last_point =  str((last_point[0]) + "," + str(last_point[1])
                points_head_point = points_info % (label,occluded, last_point)

                # print(points_head_point)
                rewrite_file.write(ployline_image)
                rewrite_file.write(points_head_point)



            rewrite_file.write(image_info_end)
        rewrite_file.write(end_info)
        rewrite_file.close()


if __name__ == '__main__':

    cvat_xml_path = r"task3.xml"
    rewrite_file_path = r"rewrite_task3.xml"
    write(cvat_xml_path,rewrite_file_path)

拾光123

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
使用xml.dom.minidom对xml进行解析

使用xml.dom.minidom对xml进行解析<?xml version="1.0" encoding="utf-8"?></annotations> <version>1.1</version> <meta> <task> ... </meta> <image id="0" name="pose_0420_task9_0000.jpg" width="1080" height
复制链接

扫一扫

专栏目录