INRIA DataSet数据集label转换为VOC2007数据集形式

    这里纯属是一个数据集标签的格式转换,所有直接上代码:

# -*- coding: utf-8 -*-
"""
Created on Wed Apr  4 15:10:06 2018

@author: kuangyongjian
"""
#!/usr/bin/python
# -*- coding: UTF-8 -*-
# 文件名:txt_to_xml.py
 
from xml.dom.minidom import Document
import os
import re
 
list = os.listdir("F:/za/Model/pedestrain/INRIAPerson/Train/annotations")
 
for oldfilename in list:
    if str(".txt") not in oldfilename:
        continue
    print (oldfilename)
    
    oldfilename = "F:/za/Model/pedestrain/INRIAPerson/Train/annotations/" + oldfilename
 
 
    fileindex = re.findall('\d+', oldfilename)
 
    print (fileindex)
 
    print (str(int(fileindex[0])))
 
    newfilename = "00" + str(int(fileindex[0])) + ".xml"
 
    #print newfilename
    #raw_input(unicode('按回车键退出...','utf-8').encode('gbk'))
    f = open(oldfilename, "r")
    print ('processing:' + f.name)
 
    doc = Document()
    annotation = doc.createElement('annotation')
    doc.appendChild(annotation)
 
    folder = doc.createElement('folder')
    folder.appendChild(doc.createTextNode('VOC2007'))
    annotation.appendChild(folder)
 
    filename = doc.createElement('filename')
    filename.appendChild(doc.createTextNode("00" + str(int(fileindex[0])) + ".jpg"))
    annotation.appendChild(filename)
 
    source = doc.createElement('source')
    annotation.appendChild(source)
 
    database = doc.createElement('database')
    database.appendChild(doc.createTextNode('PASperson Database'))
    source.appendChild(database)
 
    annotation1 = doc.createElement('annotation')
    annotation1.appendChild(doc.createTextNode('PASperson'))
    source.appendChild(annotation1)
 
    fr = f.readlines()  # 调用文件的 readline()方法一次读取
 
    for line in fr:
        if str(line).__contains__("size"):
            sizes = []
            sizes = re.findall('\d+', line)
            size = doc.createElement('size')
            annotation.appendChild(size)
            width = doc.createElement('width')
            width.appendChild(doc.createTextNode(sizes[0]))
            size.appendChild(width)
            height = doc.createElement('height')
            height.appendChild(doc.createTextNode(sizes[1]))
            size.appendChild(height)
            depth = doc.createElement('depth')
            depth.appendChild(doc.createTextNode(sizes[2]))
            size.appendChild(depth)
 
            segmented = doc.createElement('segmented')
            segmented.appendChild(doc.createTextNode('0'))
            annotation.appendChild(segmented)
        if (str(line).__contains__('Objects')):
            nums = re.findall('\d+', line)
            break
    for index in range(1, int(nums[0])+1):
        for line in fr:
            if str(line).__contains__("Bounding box for object " + str(index)):
                coordinate = re.findall('\d+', line)
                object = doc.createElement('object')
                annotation.appendChild(object)
 
                name = doc.createElement('name')
                name.appendChild(doc.createTextNode('person'))
                object.appendChild(name)
 
                pose = doc.createElement('pose')
                pose.appendChild(doc.createTextNode('Unspecified'))
                object.appendChild(pose)
 
                truncated = doc.createElement('truncated')
                truncated.appendChild(doc.createTextNode('0'))
                object.appendChild(truncated)
 
                difficult = doc.createElement('difficult')
                difficult.appendChild(doc.createTextNode('0'))
                object.appendChild(difficult)
 
                bndbox = doc.createElement('bndbox')
                object.appendChild(bndbox)
                 
                #数字中包含序号,下标应从1开始
                xmin = doc.createElement('xmin')
                xmin.appendChild(doc.createTextNode(coordinate[1]))
                bndbox.appendChild(xmin)
                ymin = doc.createElement('ymin')
                ymin.appendChild(doc.createTextNode(coordinate[2]))
                bndbox.appendChild(ymin)
                xmax = doc.createElement('xmax')
                xmax.appendChild(doc.createTextNode(coordinate[3]))
                bndbox.appendChild(xmax)
                ymax = doc.createElement('ymax')
                ymax.appendChild(doc.createTextNode(coordinate[4]))
                bndbox.appendChild(ymax)
    f.close()
    f = open(newfilename, 'w')
    f.write(doc.toprettyxml(indent=""))
    f.close()
 
    print (str(fileindex) + " compelete")
 
print ('process compelete')
评论 8
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值