这里纯属是一个数据集标签的格式转换,所有直接上代码:
# -*- coding: utf-8 -*-
"""
Created on Wed Apr 4 15:10:06 2018
@author: kuangyongjian
"""
#!/usr/bin/python
# -*- coding: UTF-8 -*-
# 文件名:txt_to_xml.py
from xml.dom.minidom import Document
import os
import re
list = os.listdir("F:/za/Model/pedestrain/INRIAPerson/Train/annotations")
for oldfilename in list:
if str(".txt") not in oldfilename:
continue
print (oldfilename)
oldfilename = "F:/za/Model/pedestrain/INRIAPerson/Train/annotations/" + oldfilename
fileindex = re.findall('\d+', oldfilename)
print (fileindex)
print (str(int(fileindex[0])))
newfilename = "00" + str(int(fileindex[0])) + ".xml"
#print newfilename
#raw_input(unicode('按回车键退出...','utf-8').encode('gbk'))
f = open(oldfilename, "r")
print ('processing:' + f.name)
doc = Document()
annotation = doc.createElement('annotation')
doc.appendChild(annotation)
folder = doc.createElement('folder')
folder.appendChild(doc.createTextNode('VOC2007'))
annotation.appendChild(folder)
filename = doc.createElement('filename')
filename.appendChild(doc.createTextNode("00" + str(int(fileindex[0])) + ".jpg"))
annotation.appendChild(filename)
source = doc.createElement('source')
annotation.appendChild(source)
database = doc.createElement('database')
database.appendChild(doc.createTextNode('PASperson Database'))
source.appendChild(database)
annotation1 = doc.createElement('annotation')
annotation1.appendChild(doc.createTextNode('PASperson'))
source.appendChild(annotation1)
fr = f.readlines() # 调用文件的 readline()方法一次读取
for line in fr:
if str(line).__contains__("size"):
sizes = []
sizes = re.findall('\d+', line)
size = doc.createElement('size')
annotation.appendChild(size)
width = doc.createElement('width')
width.appendChild(doc.createTextNode(sizes[0]))
size.appendChild(width)
height = doc.createElement('height')
height.appendChild(doc.createTextNode(sizes[1]))
size.appendChild(height)
depth = doc.createElement('depth')
depth.appendChild(doc.createTextNode(sizes[2]))
size.appendChild(depth)
segmented = doc.createElement('segmented')
segmented.appendChild(doc.createTextNode('0'))
annotation.appendChild(segmented)
if (str(line).__contains__('Objects')):
nums = re.findall('\d+', line)
break
for index in range(1, int(nums[0])+1):
for line in fr:
if str(line).__contains__("Bounding box for object " + str(index)):
coordinate = re.findall('\d+', line)
object = doc.createElement('object')
annotation.appendChild(object)
name = doc.createElement('name')
name.appendChild(doc.createTextNode('person'))
object.appendChild(name)
pose = doc.createElement('pose')
pose.appendChild(doc.createTextNode('Unspecified'))
object.appendChild(pose)
truncated = doc.createElement('truncated')
truncated.appendChild(doc.createTextNode('0'))
object.appendChild(truncated)
difficult = doc.createElement('difficult')
difficult.appendChild(doc.createTextNode('0'))
object.appendChild(difficult)
bndbox = doc.createElement('bndbox')
object.appendChild(bndbox)
#数字中包含序号,下标应从1开始
xmin = doc.createElement('xmin')
xmin.appendChild(doc.createTextNode(coordinate[1]))
bndbox.appendChild(xmin)
ymin = doc.createElement('ymin')
ymin.appendChild(doc.createTextNode(coordinate[2]))
bndbox.appendChild(ymin)
xmax = doc.createElement('xmax')
xmax.appendChild(doc.createTextNode(coordinate[3]))
bndbox.appendChild(xmax)
ymax = doc.createElement('ymax')
ymax.appendChild(doc.createTextNode(coordinate[4]))
bndbox.appendChild(ymax)
f.close()
f = open(newfilename, 'w')
f.write(doc.toprettyxml(indent=""))
f.close()
print (str(fileindex) + " compelete")
print ('process compelete')