最近做OCR的项目,需要将Pascal Voc的格式转换为ICDAR2015的格式,所以写了这个转换脚本。
import os
import xml.etree.ElementTree as ET
xmldir='Annotations'
gtdir='ICDAR_gt'
if not os.path.exists(gtdir):
os.mkdir(gtdir)
xmlfiles=os.listdir(xmldir)
for xmlfile in xmlfiles:
tree = ET.parse(xmldir+'/'+xmlfile)
root = tree.getroot()
gt_name='gt_'+xmlfile.split('.')[0]+'.txt'
with open(gtdir+'/'+gt_name,'a') as f:
for obj in root.iter('object'):
difficult = obj.find('difficult').text
number = obj.find('name').text
xmlbox = obj.find('bndbox')
xmin=int(xmlbox.find('xmin').text)
xmax=int(xmlbox.find('xmax').text)
ymin=int(xmlbox.find('ymin').text)
ymax=int(xmlbox.find('ymax').text)
print('xmin:',xmin)
print('xmax:',xmax)
print('ymin:',ymin)
print('ymax:',ymax)
x1=xmin
y1=ymin
x2=xmax
y2=ymin
x3=xmax
y3=ymax
x4=xmin
y4=ymax
line=str(x1)+','+str(y1)+','+str(x2)+','+str(y2)+','+str(x3)+','+str(y3)+','+str(x4)+','+str(y4)+','+number+'\n'
f.write(line)