下图是需要读的xml文档
下图是需要写入的目标xml文档
代码实现
# -*- coding:utf-8 -*-
import xml.etree.ElementTree as ET
from lxml import etree
import os
# 增加换行符(利用.tail函数或者使用.text函数写入换行符和空格即可)
def __indent(elems, level="\n"):
if elems.__len__() == 25:
elems.text = level + " "
for elem in elems:
elem.tail = level +" "
elem[0].tail = level + " "
if elem.__len__() == 1:
elem.text = level + " "
# 文件路径
dir_path = r"D:\data\20220711\VP_Train_LQ\0bec9e81a866a0b0d4ac3ed5fc9b3d49"
# 创建一个无属性节点,用于包装迭代产生的子节点
roots = ET.Element('root')
# 迭代找出所有原xml文件中关键信息
i = 0
for item in os.listdir(dir_path):
if item.endswith("xml"):
xml_tree = etree.parse(os.path.join(dir_path,item))
xml_root = xml_tree.getroot()
ele0 = xml_root.findall("vp_x")[0].text
ele1 = xml_root.findall("vp_y")[0].text
ele2 = xml_root.findall("filename")[0].text
# 进入size节点
size_root = xml_root.findall("size")
ele3 = size_root[0].findall("height")[0].text
ele4 = size_root[0].findall("width")[0].text
# 将所需要的信息打包成列表
element = [ele0,ele1,ele2,ele3,ele4]
# 创建节点--image
image = ET.Element('image') # 创建节点
# 设置image属性,默认按照首字母排序,属性排序无所谓
image.set("id", str(i))
image.set("name", element[2])
image.set("width", str(element[4]))
image.set("height", str(element[3]))
# 创建节点--points
points = ET.Element("points")
# 设置points节点
points.set("label","vp")
points.set("occluded","0")
points.set("source","manual")
points.set("points","%s,%s"%(str(element[0]),str(element[1])))
points.set("z_order","0")
points.text = "\n" + " "
image.append(points)
# 将迭代的子节点放入root节点下
roots.append(image)
# 调用增加换行符函数
__indent(roots)
# 创建文档
tree = ET.ElementTree(roots)
tree.write('default.xml', encoding='utf-8', xml_declaration=True)
注意,实际生成的xml有二十五的子节点,在第二张图中我们只截取一个
下面是几个操作案例或者注意事项
# 如果一个节点有内容,是下面这个样子,可以用.text修改.此时内容为两个空格
<image> </image>
# 下面是节点没有内容
<image>
# 下面这个xml文档只有一个根节点annotations,现在给它加上一个子节点,并且加入换行操作
<?xml version='1.0' encoding='UTF-8'?>
<annotations>
</annotations>
from lxml import etree
dir_path = "..xml"
tree = etree.parse(dir_path)
root = tree.getroot()
image = etree.SubElement(root, "image")
# 给子节点image加入内容(一个换行符)
image.text = "\n"
# 加入换行符
image.tail = "\n"
tree.write("..xml",encoding="utf-8", xml_declaration=True)
# 如果有两个根节点可以做一个遍历
# 操作结果
<?xml version='1.0' encoding='UTF-8'?>
<annotations>
<image>
</image>
</annotations>
将一个xml中的信息写入到另一个xml
from lxml import etree
dir_path = r"C:\Users\17230\Downloads\4\annotations.xml"
img_path = r"C:\Users\17230\Downloads\4\images"
xml_path = r"D:\data\fda0b5c69e1035afc5b34ccd8b01088d_000226.xml"
# 读取大xml
tree = etree.parse(dir_path)
root = tree.getroot()
# 读取小xml
xml_tree = etree.parse(xml_path)
xml_root = xml_tree.getroot()
for img in root:
if "image" in str(etree.tostring(img)):
name = img.attrib["name"]
xml_name= name.split(".")[0]
width = img.attrib["width"]
height = img.attrib["height"]
for vp in img:
if "vp" in str(etree.tostring(vp)):
points = vp.attrib["points"]
vp_x = points.split(",")[0]
vp_y = points.split(",")[1]
xml_root.findall("vp_x")[0].text =vp_x
xml_root.findall("vp_y")[0].text = vp_y
xml_root.findall("filename")[0].text = name
# 进入size节点
size_root = xml_root.findall("size")
size_root[0].findall("height")[0].text = height
size_root[0].findall("width")[0].text = width
xml_tree.write(f"{img_path}\{xml_name}.xml", encoding='utf-8', xml_declaration=True)