python导出xml文件_Python应用基础-根据指定文件生成XML

因项目需要根据指定格式的文件生成XML标注文件,可以方便使用LabelImg打开进行编辑和查看。其原始文件默认使用逗号进行分隔,如下所示:

第1个值:原始图片中切图小文件,以AIpng_x,其中x代表原始图片的第几个切图文件

第2~5值:分别对应于ymin, xmin, ymax, xmax

第6个值:代表对应的标签标注

在生成XML文件时,需要对其进行汇总,即将属于同一个原始文件的切图小文件的标注汇总到一起,其实现代码如下所示:

import os

from Logger import MyLogger

from xml.dom.minidom import Document,parse

from collections import defaultdict

import re

class OpeateXML:

def __init__(self, srcPath: str, targetPath: str, srcFileName: str):

self._srcPath = srcPath

self._targetPath = targetPath

self._srcFileName = srcFileName

def readSrcFileName(self, fileEncoding="utf8") -> defaultdict:

data = defaultdict(list)

s = re.compile("\.AIpng_\d{1,}", re.IGNORECASE)

srcFileFullPath = os.path.join(self._srcPath, self._srcFileName)

try:

if os.path.exists(srcFileFullPath):

with open(srcFileFullPath, mode="r", encoding=fileEncoding, errors="ignore") as fr:

for content in fr.readlines():

data[s.sub(".AIpng",content.strip().split(",")[0])].append(content.strip())

except Exception as ex:

MyLogger().error(f"OperateXML:read file error:\n{ex}")

return {}

else:

# data.sort(key=lambda x: x.strip().split(",")[0])

return data

def getCreateXMLData(self,srcData:dict,mnlData:list)->defaultdict:

"""

获取手动确认的图片

srcData:Location.txt中的原始数据

mnlData:手动确认数据

"""

try:

for key,values in srcData.items():

for item in mnlData:

for v in values:

if item in v.strip().split(",")[0]:

srcData[key][srcData[key].index(v)]=srcData[key][srcData[key].index(v)].replace("auto","mnl")

except Exception as ex:

MyLogger().error(f"OperateXML: get data from location and mnldata interaction error\n{ex}")

return {}

else:

return srcData

def operateXML(self,data:defaultdict)->None:

for k in data.keys():

xmlFileFullPath = os.path.join(self._targetPath, os.path.splitext(k)[0]+".xml")

if os.path.exists(xmlFileFullPath):

self.appendExistXML(data={k:data[k]},xmlFileFullPath=xmlFileFullPath)

else:

self.createNewXML({k:data[k]})

def appendExistXML(self,data:defaultdict,xmlFileFullPath:str,fileEncoding="utf8"):

try:

doc = parse(xmlFileFullPath)

rootNode = doc.documentElement

# print(rootNode.nodeName)

key=rootNode.getElementsByTagName("filename")[0].childNodes[0].data

objs=rootNode.getElementsByTagName("object")

for obj in objs:

name=obj.getElementsByTagName("name")[0].childNodes[0].data

bndboxs = obj.getElementsByTagName("bndbox")

for bndbox in bndboxs:

xmin = bndbox.getElementsByTagName("xmin")[0].childNodes[0].data

ymin = bndbox.getElementsByTagName("ymin")[0].childNodes[0].data

xmax = bndbox.getElementsByTagName("xmax")[0].childNodes[0].data

ymax = bndbox.getElementsByTagName("ymax")[0].childNodes[0].data

existData=f"existData,{ymin},{xmin},{ymax},{xmax},{name}"

data[key].append(existData)

data[key]=list(set(data[key]))

os.remove(xmlFileFullPath)

self.createNewXML(data=data)

except Exception as ex:

MyLogger().error(f"OperateXML:append content to {xmlFileFullPath} error\n{ex}")

return

def createNewXML(self, data: dict, fileEncoding="utf8")->None:

"""

data:传入的数据字典

fileEncoding:XML默认编码格式

"""

if data:

try:

for k,v in data.items():

doc = Document()

# 创建根节点

rootNode = doc.createElement("annotation")

# 添加根节点

doc.appendChild(rootNode)

folder = doc.createElement("folder")

folderText = doc.createTextNode(self._targetPath)

folder.appendChild(folderText)

rootNode.appendChild(folder)

filename = doc.createElement("filename")

filenameText = doc.createTextNode(k)

filename.appendChild(filenameText)

rootNode.appendChild(filename)

path = doc.createElement("path")

pathText = doc.createTextNode(os.path.join(self._targetPath,k))

path.appendChild(pathText)

rootNode.appendChild(path)

for i in v:

tmpData = i.strip().split(",")

if len(tmpData) == 6:

_, ymin, xmin, ymax, xmax, labelName = tmpData

if not labelName.__contains__("/"):

continue

objectObj = doc.createElement("object")

rootNode.appendChild(objectObj)

objectName = doc.createElement("name")

objectNameText = doc.createTextNode(labelName)

objectName.appendChild(objectNameText)

objectObj.appendChild(objectName)

objectBndBox = doc.createElement("bndbox")

objectObj.appendChild(objectBndBox)

objectBndBoxXmin = doc.createElement("xmin")

objectBndBoxYmin = doc.createElement("ymin")

objectBndBoxXmax = doc.createElement("xmax")

objectBndBoxYmax = doc.createElement("ymax")

objectBndBoxXminText = doc.createTextNode(xmin)

objectBndBoxYminText = doc.createTextNode(ymin)

objectBndBoxXmaxText = doc.createTextNode(xmax)

objectBndBoxYmaxText = doc.createTextNode(ymax)

objectBndBox.appendChild(objectBndBoxXmin)

objectBndBox.appendChild(objectBndBoxYmin)

objectBndBox.appendChild(objectBndBoxXmax)

objectBndBox.appendChild(objectBndBoxYmax)

objectBndBoxXmin.appendChild(objectBndBoxXminText)

objectBndBoxYmin.appendChild(objectBndBoxYminText)

objectBndBoxXmax.appendChild(objectBndBoxXmaxText)

objectBndBoxYmax.appendChild(objectBndBoxYmaxText)

objectObj.appendChild(objectBndBox)

else:

continue

# save xml

xmlName=os.path.splitext(k)[0]+".xml"

targetPath = os.path.join(self._targetPath, xmlName)

with open(targetPath, mode="w", encoding=fileEncoding,errors="ignore") as fw:

doc.writexml(fw, indent="\t", newl="\n", addindent="\t", encoding=fileEncoding)

except Exception as ex:

MyLogger().error(f"OperateXML:Save xml error\n{ex}")

return

if __name__ == '__main__':

srcPath = r"C:\Users\Surpass\Documents\PycharmProjects\data\TEST-8\outs"

srcName = "locations.txt"

targetPath = r"C:\Users\Surpass\Documents\PycharmProjects\data\TEST-8\outs\in_number"

operateXML = OpeateXML(srcPath, targetPath, srcName)

a = operateXML.readSrcFileName()

testData=['slide1_cell420_image0_met.AIpng_36.jpg', 'slide1_cell420_image0_met.AIpng_33.jpg', 'slide1_cell420_image0_met.AIpng_10.jpg', 'slide1_cell420_image0_met.AIpng_30.jpg']

res=operateXML.getCreateXMLData(a,testData)

operateXML.operateXML(res)

最终生成的XML效果如下所示:

在LabelImg中的效果如下所示:

本文同步在微信订阅号上发布,如各位小伙伴们喜欢我的文章,也可以关注我的微信订阅号:woaitest,或扫描下面的二维码添加关注:

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值