现在很多训练样本都是VOC格式,而样本数据又需要分析处理。
写了一个小脚本对VOC格式的数据进行处理:
# -*- coding:utf-8 -*-
import cv2
import os
import argparse
import numpy as np
from xml.etree import ElementTree as ET
def parse_args():
args = argparse.ArgumentParser()
args.add_argument('Annotations',help='Annotations File')
args.add_argument('ImageSets',help='ImageSets File')
args.add_argument('JPEGImages',help='JPEGImages File')
return args.parse_args()
def getbbox(xml):
tree = ET.parse(xml)
root = tree.getroot()
bboxes = []
for bndbox in root.iter(tag = "bndbox"):
xmin = int(bndbox[0].text)
ymin = int(bndbox[1].text)
xmax = int(bndbox[2].text)
ymax = int(bndbox[3].text)
bboxes.append([xmin, ymin, xmax, ymax])
return bboxes
def fill(img, xml):
im = cv2.imread(img)
bboxes = getbbox(xml)
for xmin,ymin,xmax,ymax in bboxes:
w = xmax-xmin+1
h = ymax-ymin+1
if w < 20 or h < 20:
rect = np.array([[(xmin, ymin), (xmin, ymax), (xmax, ymax), (xmax, ymin)]])
cv2.fillPoly(im, rect, (0,0,0))
else:
cv2.rectangle(im, (xmin, ymin), (xmax, ymax), (0,0,255), 2)
cv2.putText(im, str(w), (xmin,ymin-5), 0, 1, (0,255,0))
cv2.imshow('fill', im)
cv2.waitKey()
kind = {'trainval': False, 'test': True}
def vision_label(img, xml):
im = cv2.imread(img)
bboxes = getbbox(xml)
for xmin,ymin,xmax,ymax in bboxes:
w = xmax-xmin
cv2.rectangle(im, (xmin, ymin), (xmax, ymax), (0,0,255), 2)
cv2.putText(im, str(w), (xmin,ymin-5), 0, 1, (0,255,0))
cv2.imshow('label', im)
cv2.waitKey()
vision = False
fillful = True
def handle(Annotations, JPEGImages, ImageSets):
for key in kind.keys():
if kind[key]:
with open(ImageSets+'/Main/'+key+'.txt') as f:
for line in f:
name = line[:-1]
img = JPEGImages+"\\"+name+'.jpg'
xml = Annotations+"\\"+name+'.xml'
if vision:
vision_label(img, xml)
if fillful:
fill(img, xml)
if __name__ == "__main__":
args = parse_args()
handle(args.Annotations, args.JPEGImages, args.ImageSets)