很久没有更新博客了,今天更新点更改xml文件的脚本,也是为了自己能保存下来吧。
1.一个很全的可以更改xml文件中任意一行节点的内容,按照自己的需要选择注释掉什么,解注释哪些。可成批修改。
import os
import re
xml_dir="./Annotations1/"
xml_filenames=os.listdir(xml_dir)
for xml_name in xml_filenames:
print(xml_dir+xml_name)
xml_name_seperate=xml_name.split('.')[0]
f=open(xml_dir+xml_name,'r+')
xml_data=f.read()
# xml_data = re.sub('\<folder>(.*?)\</folder>','<folder>JPEGImages</folder>', xml_data)
# xml_data = re.sub('\<filename>(.*?)\</filename>', '<filename>'+xml_name_seperate+'</filename>', xml_data)
# xml_data = re.sub('\<path>(.*?)\</path>',
# '<path>/home/tsn/workspace/py-faster-rcnn/data/VOCdevkit2007/VOC2007/JPEGImages/' +
# xml_name_seperate + '.jpg</path>', xml_data)
#
# xml_data = re.sub('\<name>(.*?)\</name>','<name>normal</name>', xml_data)
xml_data = re.sub('\<width>(.*?)</width>','<width>1920</width>', xml_data)
xml_data = re.sub('\<height>(.*?)</height>','<height>1080</height>', xml_data)
# xml_data = re.sub('\<truncated>1</truncated>', '<truncated>0</truncated>', xml_data)
f.seek(0) # set point to 0
f.truncate() # clear all
f.write(xml_data) # write all data
f.close()
2.修改单个节点,修改哪个换成哪个。
#修改xml文件中的目标的名字,
import os, sys
import glob
from xml.etree import ElementTree as ET
# 批量读取Annotations下的xml文件
# per=ET.parse(r'C:\Users\rockhuang\Desktop\Annotations\000003.xml')
xml_dir = r'./VOC2018/Annotations/'
xml_list = glob.glob(xml_dir + '/*.xml')
for xml in xml_list:
print(xml)
per = ET.parse(xml)
p = per.findall('/size') #输入要更改的节点的名字
for oneper in p: # 找出person节点
child = oneper.getchildren()[0] # 找出person节点的子节点
if child.text == '960': #需要修改的名字
child.text = '1152' #修改成什么名字
per.write(xml)
print(child.tag, ':', child.text)
# coding=utf-8
import os
import os.path
import xml.dom.minidom
#获得文件夹中所有文件
FindPath = './Annotations1/'
FileNames = os.listdir(FindPath)
s = []
xml_path = './Annotations1/'
for file_name in FileNames:
if not os.path.isdir(file_name): # 判断是否是文件夹,不是文件夹才打开
print (file_name)
#读取xml文件
dom = xml.dom.minidom.parse(os.path.join(FindPath,file_name))
root = dom.documentElement
# 获取标签对name之间的值
name = root.getElementsByTagName('name')
for i in range(len(name)):
print(len(name))
print(name[i].firstChild.data),
if name[i] .firstChild.data== 'normal_bolt':
name[i].firstChild.data = 'norbolt'
print ('修改后的 name'),
print (name[i].firstChild.data)
#将修改后的xml文件保存
with open(os.path.join(xml_path, file_name), 'w') as fh:
dom.writexml(fh)
print('write name/pose OK!')
import os
import re
# 设置为自己Annotations保存路径
_dir = "./VOC2018/Annotations/"
xmlList = os.listdir(_dir)
n = 0
for xml in xmlList:
# f = open(_dir + xml, "r")
f = open(_dir + xml, "r", encoding='utf-8')
xmldata = f.read()
# 设置为希望修改的path即可
xmldata = re.sub('\<path>(.*?)\</path>',
'/home/tsn/workspace/py-faster-rcnn/data/VOCdevkit2007/VOC2007/JPEGImages/' + str(n).zfill(6) + '.jpg</path>', xmldata)
f.close()
f = open(_dir + xml, "w")
f.write(xmldata)
f.close()
n += 1
以上三个都可以单个节点进行修改,可以任选其一使用。
3.对已经标注好的图片进行修改了size之后,对它的标注文件里的目标框的坐标也要进行修改。下面的代码就是按图片resize的比例成批修改x,y的值的。该例子中原大小为1152×864,更改成1920×1080
# -*- coding: utf-8 -*-
from xml.etree.ElementTree import ElementTree, Element
import xml.etree.ElementTree as ET
import os
import re
xml_dir="./Annotations/"
xml_dir1="./Annotations1/"
xml_filenames=os.listdir(xml_dir)
for xml_name in xml_filenames:
tree = ET.parse(xml_dir+xml_name)
# tree = ET.parse('D:\python\venv1\output.xml')
root = tree.getroot()
print(root.text)
# 遍历文件所有的tag 为目标的值得标签
for elem in root.iter('xmin'):
new_elem =int (int(elem.text) / 1152*1920)
elem.text = str(new_elem)
for elem in root.iter('ymin'):
new_elem = int(int(elem.text) / 864 *1080)
elem.text = str(new_elem)
for elem in root.iter('xmax'):
new_elem = int(int(elem.text) / 1152*1920)
elem.text = str(new_elem)
for elem in root.iter('ymax'):
new_elem = int(int(elem.text) / 864 *1080)
elem.text = str(new_elem)
tree.write(xml_dir1+xml_name)