最近对之前采集到的人脸深度数据做标注,需要对标注生成的xml文件做信息提取,重新整合到一个txt文件内。这里写了个简单的xml文件读取信息脚本。
import os
import xml.dom.minidom
import re
def read_xml(filepath):
dom = xml.dom.minidom.parse(filepath) #读取xml文件
root = dom.documentElement
faces = dom.getElementsByTagName('name') #获取xml文件里‘name’标签内的数据
face_n = len(faces)
if face_n > 0:
# write filename
itemlist = root.getElementsByTagName('path')
item = itemlist[0]
filepath = item.firstChild.data
filename_1 = re.split('Desktop\\\\',filepath)[1] #根据‘path’标签内的数据,字符串关键字‘Desktop’对字符串进行分割,并取分割后的第二个子字符串
filepath_1 = re.sub('\\\\','/',filename_1) #将字符串内的‘\\’替换为‘/’
line = re.sub('_full.jpg','.xml',filepath_1) + '\n' #将字符串内的‘_full.jpg’替换为‘.xml’
print(filepath_1)
list.write(line)
# write face number
print(face_n)
line = str(face_n) + '\n'
list.write(line)
# write coordinate
xmins = root.getElementsByTagName('xmin')
xmaxs = root.getElementsByTagName('xmax')
ymins = root.getElementsByTagName('ymin')
ymaxs = root.getElementsByTagName('ymax')
for i in range(face_n):
xmin = float(xmins[i].firstChild.data)
xmax = float(xmaxs[i].firstChild.data)
ymin = float(ymins[i].firstChild.data)
ymax = float(ymaxs[i].firstChild.data)
line = str(xmax) + ' ' + str(ymax) + ' ' + str(xmin) + ' ' + str(ymin) #将坐标写入txt文件
list.write(line)
print (line)
c_dir = '....'
list = open('face.txt','w')
for root, dirs, files in os.walk(c_dir):
for filename in files:
if filename.endswith('.xml'):
xml_path = os.path.join(root,filename)
print (xml_path)
read_xml(xml_path)