【数据处理】xml的创建、读写问题

本文介绍了XML的基本操作,包括如何创建、读取和修改XML文件。示例中展示了VOC数据标注格式的XML结构,并提供了删除特定节点的代码。此外,还提供了XML读取的示例,展示如何从XML文件中提取电影信息。这些技能对于处理图像识别和机器学习数据集至关重要。
摘要由CSDN通过智能技术生成

一、VOC格式数据标注范例

<annotation>
    <folder>VOC2007</folder>
    <filename>DSC00142.JPG</filename>
    <source>
        <database>ezi</database>
    </source>
    <size>
        <width>760</width>
        <height>700</height>
        <depth>3</depth>
    </size>
    <segmented>0</segmented>
    <object>
        <name>mlc</name>
        <pose>Unspecified</pose>
        <truncated>0</truncated>
        <difficult>0</difficult>
        <bndbox>
            <xmin>483</xmin>
            <ymin>446</ymin>
            <xmax>582</xmax>
            <ymax>549</ymax>
        </bndbox>
    </object>
    <object>
        <name>mlc</name>
        <pose>Unspecified</pose>
        <truncated>0</truncated>
        <difficult>0</difficult>
        <bndbox>
            <xmin>131</xmin>
            <ymin>442</ymin>
            <xmax>221</xmax>
            <ymax>532</ymax>
        </bndbox>
    </object>
</annotation>

二、关于xml的创建、读写问题

(1)xml的创建

# -*- coding:utf-8 -*-
# https://zhuanlan.zhihu.com/p/54269963


# from xml.etree import ElementTree as  etree
from xml.etree.ElementTree import Element
from xml.etree.ElementTree import SubElement
from xml.etree.ElementTree import ElementTree
from xml.dom import minidom

# generate root node
root = Element('root')

# generate first child-node head
head = SubElement(root, 'head')

# child-node of head node
title = SubElement(head, 'title')
title.text = "Well Dola!"

# generate second child-node body
body = SubElement(root, 'body')
body.text = "I Love Dola!"

tree = ElementTree(root)

# write out xml data
tree.write('result.xml', encoding='utf-8')

  • 结果 :result.xml
<root>
    <head>
        <title>Well Dola!</title>
    </head>
    <body>I Love Dola!</body>
</root>

(2)在xml有根元素时写入

  • customer.xml
<customer ID="C003">
    <name>kavin</name>
    <phone>32467</phone>
    <comments>
        <![CDATA[A small but healthy company.]]>
    </comments>
</customer>
  • 在 customer.xml 中写入的范例代码
from xml.dom.minidom import parse
import xml.dom.minidom
import os


def writeXML():
    domTree = parse("./customer.xml")
    # 文档根元素
    rootNode = domTree.documentElement

    # 新建一个customer节点
    customer_node = domTree.createElement("customer")
    customer_node.setAttribute("ID", "C003")

    # 创建name节点,并设置textValue
    name_node = domTree.createElement("name")
    name_text_value = domTree.createTextNode("kavin")
    name_node.appendChild(name_text_value)  # 把文本节点挂到name_node节点
    customer_node.appendChild(name_node)

    # 创建phone节点,并设置textValue
    phone_node = domTree.createElement("phone")
    phone_text_value = domTree.createTextNode("32467")
    phone_node.appendChild(phone_text_value)  # 把文本节点挂到name_node节点
    customer_node.appendChild(phone_node)

    # 创建comments节点,这里是CDATA
    comments_node = domTree.createElement("comments")
    cdata_text_value = domTree.createCDATASection("A small but healthy company.")
    comments_node.appendChild(cdata_text_value)
    customer_node.appendChild(comments_node)

    rootNode.appendChild(customer_node)

    with open('added_customer.xml', 'w') as f:
        # 缩进 - 换行 - 编码
        domTree.writexml(f, addindent='  ', encoding='utf-8')


if __name__ == '__main__':
    writeXML()

  • 结果
<?xml version="1.0" encoding="utf-8"?>
<customer ID="C003">
    <name>kavin</name>
    <phone>32467</phone>
    <comments>
        <![CDATA[A small but healthy company.]]>
    </comments>
    <customer ID="C003">
        <name>kavin</name>
        <phone>32467</phone>
        <comments><![CDATA[A small but healthy company.]]>    </comments>
    </customer>
</customer>

(3)xml的读取

  • movie.xml
<collection shelf="New Arrivals">
    <movie title="Enemy Behind">
        <type>War, Thriller</type>
        <format>DVD</format>
        <year>2003</year>
        <rating>PG</rating>
        <stars>10</stars>
        <description>Talk about a US-Japan war</description>
    </movie>
    <movie title="Transformers">
        <type>Anime, Science Fiction</type>
        <format>DVD</format>
        <year>1989</year>
        <rating>R</rating>
        <stars>8</stars>
        <description>A schientific fiction</description>
    </movie>
    <movie title="Trigun">
        <type>Anime, Action</type>
        <format>DVD</format>
        <episodes>4</episodes>
        <rating>PG</rating>
        <stars>10</stars>
        <description>Vash the Stampede!</description>
    </movie>
    <movie title="Ishtar">
        <type>Comedy</type>
        <format>VHS</format>
        <rating>PG</rating>
        <stars>2</stars>
        <description>Viewable boredom</description>
    </movie>
</collection>
  • 读取代码
# https://www.cnblogs.com/smart-zihan/p/12015192.html

from xml.dom.minidom import parse
import xml.dom.minidom
import os


def is_xml_exist(xml_path):
    xml_exist = os.path.exists(xml_path)
    if not xml_exist:
        return False
    return True


def read_movie_xml():
    path = "movie.xml"
    if not is_xml_exist(path):
        print("%s is not exist" % path)
    else:
        # 使用minidom解析器打开XML文档

        open_xml = parse(path)
        root_node = open_xml.documentElement

        shelf_attrib = "shelf"
        if root_node.hasAttribute(shelf_attrib):
            print("Lable: %s\tAttrib: %s\t\tValue: %s" % (
                root_node.nodeName, shelf_attrib, root_node.getAttribute(shelf_attrib)))
        print("")
        # 在集合中获取所有电影
        movie_node = "movie"
        movies = root_node.getElementsByTagName(movie_node)

        # 打印每部电影的详细信息
        for movie in movies:
            print("**** Movie ****")
            if movie.hasAttribute("title"):
                print("Title: %s" % movie.getAttribute("title"))

            type_movie = movie.getElementsByTagName('type')[0]
            print("Type: %s" % type_movie.childNodes[0].data)

            format_movie = movie.getElementsByTagName('format')[0]
            print("Format: %s" % format_movie.childNodes[0].data)

            rating_movie = movie.getElementsByTagName('rating')[0]
            print("Rating: %s" % rating_movie.childNodes[0].data)

            descrip_movie = movie.getElementsByTagName('description')[0]
            print("Rating: %s" % descrip_movie.childNodes[0].data)

            print("")


if __name__ == "__main__":
    read_movie_xml()

(4)删除path节点

  • 原xml
<annotation>
	<folder>117_PANA</folder>
	<filename>P1160997.JPG</filename>
	<path>E:\样本库\0814草地贪夜蛾\117_PANA\P1160997.JPG</path>
	<source>
		<database>Unknown</database>
	</source>
	<size>
		<width>4608</width>
		<height>3456</height>
		<depth>3</depth>
	</size>
	<segmented>0</segmented>
	<object>
		<name>cdtye</name>
		<pose>Unspecified</pose>
		<truncated>0</truncated>
		<difficult>0</difficult>
		<bndbox>
			<xmin>1832</xmin>
			<ymin>2196</ymin>
			<xmax>2258</xmax>
			<ymax>2628</ymax>
		</bndbox>
	</object>
</annotation>

  • 删除path节点代码
# -*- coding=utf-8 -*-
import xml.etree.cElementTree as ET
import os


def deal_xml(xml_path):
    tree = ET.parse(xml_path)
    root = tree.getroot()
    for i in root:
        if i.tag == 'path':
            root.remove(i)
    tree.write(xml_path)


if __name__ == '__main__':
    path = r"D:\pycharm\test\处理xml\xml"
    for i in os.listdir(path):
        xml_path = os.path.join(path, i)
        deal_xml(xml_path)

  • 处理后
<annotation>
	<folder>117_PANA</folder>
	<filename>P1160997.JPG</filename>
	<source>
		<database>Unknown</database>
	</source>
	<size>
		<width>4608</width>
		<height>3456</height>
		<depth>3</depth>
	</size>
	<segmented>0</segmented>
	<object>
		<name>cdtye</name>
		<pose>Unspecified</pose>
		<truncated>0</truncated>
		<difficult>0</difficult>
		<bndbox>
			<xmin>1832</xmin>
			<ymin>2196</ymin>
			<xmax>2258</xmax>
			<ymax>2628</ymax>
		</bndbox>
	</object>
</annotation>

参考

Python创建、修改、保存XML文件——xml.etree.ElementTree模块

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 2
    评论
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值