写入xml:
import xml.dom.minidom as minidom
dom=minidom.getDOMImplementation().createDocument(None,"Root",None)
root =dom.documentElement
root.setAttribute("zoom","1.0")
for i in range(5):
element=dom.createElement("name")
element.appendChild(dom.createTextNode(""))
element.setAttribute("age",str(i))
element.setAttribute("ages",str(i))
root.appendChild(element)
with open("default.xml","w",encoding="utf-8") as f :
dom.writexml(f,addindent="\t",newl="\n",encoding="utf-8")
运行结果如下:
<?xml version="1.0" encoding="utf-8"?>
<Root zoom="1.0">
<name age="0" ages="0"></name>
<name age="1" ages="1"></name>
<name age="2" ages="2"></name>
<name age="3" ages="3"></name>
<name age="4" ages="4"></name>
</Root>
一:通用的方法
# -*- coding: UTF-8 -*-
# 从文件中读取数据
import os
import xml.etree.ElementTree as ET
import xml.dom.minidom as minidom
dom = minidom.getDOMImplementation().createDocument(None,'root',None)
root1 = dom.documentElement
root1.setAttribute('zoom', "1.0")
# 全局唯一标识
unique_id = 1
filename=[]
alldata=[]
# 遍历所有的节点
def walkData(root_node, level, result_list):
global unique_id
unique_id += 1
# 遍历每个子节点
children_node = root_node.getchildren()
if len(children_node) == 0:
return
for child in children_node:
if child.tag=="rect":
data = child.attrib
print(data)
# if child.tag == "bndbox":
# value = [content.text for content in child.getchildren()]
# print(value)
walkData(child, level + 1, result_list)
return
def getXmlData(file_name):
level = 0 # 节点的深度从1开始
result_list = []
root = ET.parse(file_name).getroot()
print(root.tag)
walkData(root, level, result_list)
return result_list
if __name__ == '__main__':
file_name = r'C:\Users\Administrator\Desktop\9c.xml'
R = getXmlData(file_name)
二:解析labelImg中xml的坐标信息方法一
# -*- coding: UTF-8 -*-
# 从文件中读取数据
import os
import xml.etree.ElementTree as ET
import xml.dom.minidom as minidom
dom = minidom.getDOMImplementation().createDocument(None,'root',None)
root1 = dom.documentElement
root1.setAttribute('zoom', "1.0")
# 全局唯一标识
unique_id = 1
filename=[]
alldata=[]
label_result={}
# 遍历所有的节点
def walkData(root_node, level, result_list):
global unique_id
## 全局唯一标识,节点深度,节点,属性和属性值,节点内容
# if root_node.tag=="entry" or root_node.tag=="string":
# temp_list = [unique_id, level, root_node.tag, root_node.attrib,root_node.text]
# result_list.append(temp_list)
unique_id += 1
# 遍历每个子节点
children_node = root_node.getchildren()
if len(children_node) == 0:
return
for child in children_node:
# print(child["index"])
# print(child.tag,child.text)
# print(child.text)
if child.tag=="path":
print(child.text)
if child.tag=="object":
for child_child in child.getchildren():
if child_child.tag =="name":
name=child_child.text
if child_child.tag =="bndbox":
value = [content.text for content in child_child.getchildren()]
label_result[name]=value
walkData(child, level + 1, result_list)
#
return
def getXmlData(file_name):
level = 0 # 节点的深度从1开始
result_list = []
root = ET.parse(file_name).getroot()
print(root.tag)
walkData(root, level, result_list)
return result_list
if __name__ == '__main__':
file_name = r'C:\Users\Administrator\Desktop\ocr_easy\code_xml\1.xml'
R = getXmlData(file_name)
print(label_result)
三:解析labelImg中xml的坐标信息方法二
import xml.etree.ElementTree as ET
import os
import cv2
import numpy as np
sorce = r'C:\Users\Administrator\Desktop\ocr_easy\code_xml' # 文件夹路径
# pic_path=r"D:\iflytek_load\北部湾商机\电汇凭证训练集100"
save_path=r"C:\Users\Desktop\cut_pics"
dir = os.listdir(sorce)
def rotate_bound(image, angle):
(h, w) = image.shape[:2]
# print(h,w)
(cX, cY) = (w // 2, h // 2)
M = cv2.getRotationMatrix2D((cX, cY), angle, 1.0)
cos = np.abs(M[0, 0])
sin = np.abs(M[0, 1])
nW = int((h * sin) + (w * cos))
nH = int((h * cos) + (w * sin))
M[0, 2] += (nW / 2) - cX
M[1, 2] += (nH / 2) - cY
return cv2.warpAffine(image, M, (nW, nH), borderValue=(255, 255, 255))
for i in range(len(dir)):
print(dir[i])
# img_path=os.path.join(pic_path,dir[i].replace("xml","jpg"))
tree = ET.parse(sorce + '/' + dir[i])
rect = {}
line = ""
root = tree.getroot()
# 路径信息
for name in root.iter('path'):
rect['path'] = name.text
# 读取图片
img = cv2.imdecode(np.fromfile(rect['path'], dtype=np.uint8), -1)
img1=rotate_bound(img,90)
for ob in root.iter('object'):
# print(ob.iter('name').text)
for xmin in ob.iter('name'):
type_name = xmin.text
print(type_name)
for bndbox in ob.iter('bndbox'):
# for l in bndbox:
# print(l.text)
print()
# 坐标信息
for xmin in bndbox.iter('xmin'):
rect['xmin'] = xmin.text
for ymin in bndbox.iter('ymin'):
rect['ymin'] = ymin.text
for xmax in bndbox.iter('xmax'):
rect['xmax'] = xmax.text
for ymax in bndbox.iter('ymax'):
rect['ymax'] = ymax.text
print(type(rect['xmin']))
print(rect['xmin'] + ' ' + rect['ymin'] + ' ' + rect['xmax'] + ' ' + rect['ymax'])
line = rect['xmin'] + ' ' + rect['ymin'] + ' ' + rect['xmax'] + ' ' + rect['ymax'] + " "
# 裁剪所需要的部分
# img_cut = img1[int(rect['ymin']):int(rect['ymax']), int(rect['xmin']):int(rect['xmax'])]
# save_img_path=os.path.join(save_path,"{}_{}.jpg".format(type_name,os.path.splitext(dir[i])[0]))
# # 保存生成的图片
# cv2.imencode('.jpg', img_cut, )[1].tofile(save_img_path)
# cv2.imencode('.jpg', img1, )[1].tofile(save_img_path)
四:常用xml模式
<?xml version="1.0" encoding="utf-8"?>
<Root zoom="1.0">
<name age="0" ages="0"></name>
<name age="1" ages="1"></name>
<name age="2" ages="2"></name>
<name age="3" ages="3"></name>
<name age="4" ages="4"></name>
</Root>
解析方法
# -*— conding: utf-8 -*-
import xml.etree.ElementTree as ET
import os
all_xml_list=[]
def et_parse(xml_file):
xml_dict = {}
tree=ET.parse(xml_file)
root =tree.getroot()
for child in root:
data=child.attrib
print(data)
if __name__ == "__main__":
file_name = r'C:\Users\Administrator\Desktop\29c.xml'
et_parse(file_name)