修改xml中的标签(多个人标注,大小写难免不一致),在voc转换yolo时需要转换为大小写一致的进行训练,下面代码部分包含两部分:同意大小写和更新某个类别的名称
更新xml中的大小写及类别
import os.path
import xml.dom.minidom
xmlFilePath = r"D:\xsy\removenodexmltext"# 首先将xml文件放到指定的(模型训练时代码读取的地方)文件夹内
xmlFileNames = os.listdir(xmlFilePath)
def change1(joinXmlFilePath):
# # xml文件读取操作
dom = xml.dom.minidom.parse(joinXmlFilePath)
root = dom.documentElement # 获取xml文件文本,即根目录
# getElementsByTagName:返回带有指定标签名的对象的集合。
oriFileName = root.getElementsByTagName("name")
for ofn in iter(oriFileName):
fileName = ofn.firstChild.data
print("原始class名:", fileName)
# 修改后文件名
newFileName = fileName.lower()
ofn.firstChild.data = newFileName
print("修改后class名:", newFileName)
with open(joinXmlFilePath, "w") as pn:
dom.writexml(pn) # 打开拼接的目录下的文件夹,修改xml的文本文件内容
print("finish")
def updatexmldxx():
for xmlFileName in xmlFileNames: # find xml
if not os.path.isdir(xmlFileName):
print(xmlFileName) # 判断一下是否读取正确
# xml文件读取操作
joinXmlFilePath = os.path.join(xmlFilePath,xmlFileName)
change1(joinXmlFilePath)
def change2(joinXmlFilePath):
# # xml文件读取操作
dom = xml.dom.minidom.parse(joinXmlFilePath)
root = dom.documentElement # 获取xml文件文本,即根目录
# getElementsByTagName:返回带有指定标签名的对象的集合。
oriFileName = root.getElementsByTagName("name")
for ofn in iter(oriFileName):
fileName = ofn.firstChild.data
if fileName == "people":
print("原始class名:", fileName)
# 修改后文件名
newFileName = "person"
ofn.firstChild.data = newFileName
print("修改后class名:", newFileName)
with open(joinXmlFilePath, "w") as pn:
dom.writexml(pn) # 打开拼接的目录下的文件夹,修改xml的文本文件内容
print("finish")
# 更新xml中某个类别
def updatexmlpeople2person():
for xmlFileName in xmlFileNames: # find xml
if not os.path.isdir(xmlFileName):
print(xmlFileName) # 判断一下是否读取正确
# xml文件读取操作
joinXmlFilePath = os.path.join(xmlFilePath, xmlFileName)
change2(joinXmlFilePath)
if __name__ == '__main__':
updatexmlpeople2person() # 将name为people的节点改为person
updatexmldxx() # 统一xml中name的大小写
剔除掉xml中没用到的object,并判断该xml中还有没有目标,没目标的话就把xml名称做记号,方便剔除
import xml.etree.cElementTree as ET
import os
path_root = [r'C:\Users\D\Desktop\0608\y(4)\y\fullxml'] #自己的xml路径
CLASSES = [ "bus","BUS"] #要保留的类别
for anno_path in path_root:
xml_list = os.listdir(anno_path)
for axml in xml_list:
path_xml = os.path.join(anno_path, axml)
print(path_xml)
tree = ET.parse(path_xml)
root = tree.getroot()
if len(root.findall("object")) != 0: # 在有标签的情况下,留下自己想要的标签
print(axml)
for child in root.findall('object'):
name = child.find('name').text
if not name in CLASSES:
root.remove(child)
print("remove no use label!")
if len(root.findall("object")) == 0: # 有些没有想要的目标,标签上做上标注!
print("=0",axml)
axml = "no-" + axml
tree.write(os.path.join(r'C:\Users\D\Desktop\0608\y(4)\y\fullxml1', axml)) #处理结束后保存的路径