目录
最近要使用faster-rcnn来训练数据,给的xml格式不太对,这里主要记录一下voc数据集的xml格式转化。
1.xml转化成txt
# coding:utf-8
import os
import xml.etree.cElementTree as ET
import shutil
import sys
def test(ii):
i = ii
while(i <= 1800):
if i < 601:
fold_num = 1
elif 600 < i < 1201:
fold_num = 2
else:
fold_num = 3
if i == 104:
i = 114
tree = ET.parse(xml_path + "/" + str(i).zfill(6) + ".xml")
root = tree.getroot()#根节点
filename = root.find('filename').text
if os.path.exists(txt_path + "/" + filename + '.txt'):
shutil.rmtree(txt_path + "/" + filename + '.txt')
file_object_log = open(txt_path + "/" + filename[0:filename.find(".")] + '.txt', 'w') # 写文件
for object in root.findall('object'): # 找到root节点下的所有object节点
name = object.find('name').text # 子节点下节点name的值
bndbox = object.find('bndbox') # 子节点下属性bndbox的值
xmin = bndbox.find('xmin').text
ymin = bndbox.find('ymin').text
xmax = bndbox.find('xmax').text
ymax = bndbox.find('ymax').text
file_object_log.write(name + " " + xmin + " " + ymin+" "+xmax+" "+ymax + "\n")
i = i+1
if __name__ == '__main__':
xml_path = "/home/liulinzhi/CV/dataset/ding/xml/xml"
txt_path = "/home/liulinzhi/CV/dataset/ding/txt"
names = ['a','b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't']
test(1)
2.txt转xml