python解析xml

空弹壳

已于 2022-08-12 22:54:56 修改

阅读量324

点赞数

分类专栏： python解析文件文章标签： xml python

于 2022-05-21 22:05:22 首次发布

本文链接：https://blog.csdn.net/weixin_47046791/article/details/124904145

版权

python解析文件专栏收录该内容

1 篇文章 0 订阅

订阅专栏

写入xml：

import xml.dom.minidom as minidom

dom=minidom.getDOMImplementation().createDocument(None,"Root",None)
root =dom.documentElement
root.setAttribute("zoom","1.0")
for i in range(5):
    element=dom.createElement("name")
    element.appendChild(dom.createTextNode(""))
    element.setAttribute("age",str(i))
    element.setAttribute("ages",str(i))
    root.appendChild(element)
with open("default.xml","w",encoding="utf-8") as f :
    dom.writexml(f,addindent="\t",newl="\n",encoding="utf-8")

运行结果如下：

<?xml version="1.0" encoding="utf-8"?>
<Root zoom="1.0">
   <name age="0" ages="0"></name>
   <name age="1" ages="1"></name>
   <name age="2" ages="2"></name>
   <name age="3" ages="3"></name>
   <name age="4" ages="4"></name>
</Root>

一：通用的方法

# -*- coding: UTF-8 -*-
# 从文件中读取数据
import os
import xml.etree.ElementTree as ET
import xml.dom.minidom as minidom
dom = minidom.getDOMImplementation().createDocument(None,'root',None)
root1 = dom.documentElement
root1.setAttribute('zoom', "1.0")
# 全局唯一标识
unique_id = 1
filename=[]
alldata=[]
# 遍历所有的节点
def walkData(root_node, level, result_list):
    global unique_id
    unique_id += 1
    # 遍历每个子节点
    children_node = root_node.getchildren()
    if len(children_node) == 0:
        return
    for child in children_node:
        if child.tag=="rect":
            data = child.attrib
            print(data)
        # if child.tag == "bndbox":
        #     value = [content.text for content in child.getchildren()]
        #     print(value)

        walkData(child, level + 1, result_list)
      
    return

def getXmlData(file_name):
    level = 0  # 节点的深度从1开始
    result_list = []
    root = ET.parse(file_name).getroot()
    print(root.tag)
    walkData(root, level, result_list)

    return result_list


if __name__ == '__main__':
    file_name = r'C:\Users\Administrator\Desktop\9c.xml'
    R = getXmlData(file_name)

二：解析labelImg中xml的坐标信息方法一

# -*- coding: UTF-8 -*-
# 从文件中读取数据
import os
import xml.etree.ElementTree as ET
import xml.dom.minidom as minidom
dom = minidom.getDOMImplementation().createDocument(None,'root',None)
root1 = dom.documentElement
root1.setAttribute('zoom', "1.0")
# 全局唯一标识
unique_id = 1
filename=[]
alldata=[]
label_result={}
# 遍历所有的节点
def walkData(root_node, level, result_list):
    global unique_id
    ## 全局唯一标识,节点深度，节点，属性和属性值，节点内容
    # if root_node.tag=="entry"  or root_node.tag=="string":
    # temp_list = [unique_id, level, root_node.tag, root_node.attrib,root_node.text]
    # result_list.append(temp_list)
    unique_id += 1

    # 遍历每个子节点
    children_node = root_node.getchildren()
    if len(children_node) == 0:
        return
    for child in children_node:
        # print(child["index"])
        # print(child.tag,child.text)
        # print(child.text)
        if child.tag=="path":
            print(child.text)
        if child.tag=="object":
            for child_child in child.getchildren():
                if child_child.tag =="name":
                    name=child_child.text
                if child_child.tag =="bndbox":
                    value = [content.text for content in child_child.getchildren()]
            label_result[name]=value


        walkData(child, level + 1, result_list)
        #
    return

def getXmlData(file_name):
    level = 0  # 节点的深度从1开始
    result_list = []
    root = ET.parse(file_name).getroot()
    print(root.tag)
    walkData(root, level, result_list)

    return result_list


if __name__ == '__main__':
    file_name =  r'C:\Users\Administrator\Desktop\ocr_easy\code_xml\1.xml'
    R = getXmlData(file_name)
    print(label_result)

三：解析labelImg中xml的坐标信息方法二

import xml.etree.ElementTree as ET
import os
import cv2
import numpy as np

sorce = r'C:\Users\Administrator\Desktop\ocr_easy\code_xml'  # 文件夹路径
# pic_path=r"D:\iflytek_load\北部湾商机\电汇凭证训练集100"
save_path=r"C:\Users\Desktop\cut_pics"
dir = os.listdir(sorce)
def rotate_bound(image, angle):
    (h, w) = image.shape[:2]
    # print(h,w)
    (cX, cY) = (w // 2, h // 2)
    M = cv2.getRotationMatrix2D((cX, cY), angle, 1.0)

    cos = np.abs(M[0, 0])
    sin = np.abs(M[0, 1])
    nW = int((h * sin) + (w * cos))
    nH = int((h * cos) + (w * sin))
    M[0, 2] += (nW / 2) - cX
    M[1, 2] += (nH / 2) - cY

    return cv2.warpAffine(image, M, (nW, nH), borderValue=(255, 255, 255))

for i in range(len(dir)):
    print(dir[i])
    # img_path=os.path.join(pic_path,dir[i].replace("xml","jpg"))

    tree = ET.parse(sorce + '/' + dir[i])
    rect = {}
    line = ""
    root = tree.getroot()
    # 路径信息
    for name in root.iter('path'):
        rect['path'] = name.text
        # 读取图片
    img = cv2.imdecode(np.fromfile(rect['path'], dtype=np.uint8), -1)
    img1=rotate_bound(img,90)
    for ob in root.iter('object'):
        # print(ob.iter('name').text)
        for xmin in ob.iter('name'):
            type_name = xmin.text
            print(type_name)
        for bndbox in ob.iter('bndbox'):
            # for l in bndbox:
            #     print(l.text)
            print()
            # 坐标信息
            for xmin in bndbox.iter('xmin'):
                rect['xmin'] = xmin.text
            for ymin in bndbox.iter('ymin'):
                rect['ymin'] = ymin.text
            for xmax in bndbox.iter('xmax'):
                rect['xmax'] = xmax.text
            for ymax in bndbox.iter('ymax'):
                rect['ymax'] = ymax.text
            print(type(rect['xmin']))
            print(rect['xmin'] + ' ' + rect['ymin'] + ' ' + rect['xmax'] + ' ' + rect['ymax'])
            line = rect['xmin'] + ' ' + rect['ymin'] + ' ' + rect['xmax'] + ' ' + rect['ymax'] + " "
            # 裁剪所需要的部分
            # img_cut = img1[int(rect['ymin']):int(rect['ymax']), int(rect['xmin']):int(rect['xmax'])]
            # save_img_path=os.path.join(save_path,"{}_{}.jpg".format(type_name,os.path.splitext(dir[i])[0]))
            # # 保存生成的图片
            # cv2.imencode('.jpg', img_cut, )[1].tofile(save_img_path)
            # cv2.imencode('.jpg', img1, )[1].tofile(save_img_path)

四：常用xml模式

<?xml version="1.0" encoding="utf-8"?>
<Root zoom="1.0">
	<name age="0" ages="0"></name>
	<name age="1" ages="1"></name>
	<name age="2" ages="2"></name>
	<name age="3" ages="3"></name>
	<name age="4" ages="4"></name>
</Root>

解析方法

# -*— conding: utf-8 -*-
import xml.etree.ElementTree as ET
import os
all_xml_list=[]
def et_parse(xml_file):
    xml_dict = {}
    tree=ET.parse(xml_file)
    root =tree.getroot()
    for child in root:
        data=child.attrib
        print(data)


if __name__ == "__main__":
    file_name = r'C:\Users\Administrator\Desktop\29c.xml'

    et_parse(file_name)