本代码用于将voc类型数据集转换为coco类型数据集
# -*- coding=utf-8 -*-
import json
import os
import cv2
import datetime
import xml.etree.ElementTree as ET
import shutil
# 从xml文件中提取bounding box信息, 格式为[[x_min, y_min, x_max, y_max, name]]
def parse_xml(xml_path):
tree = ET.parse(xml_path)
root = tree.getroot()
objs = root.findall('object')
coords = list()
for ix, obj in enumerate(objs):
name = obj.find('name').text
box = obj.find('bndbox')
x_min = int(box[0].text)
y_min = int(box[1].text)
x_max = int(box[2].text)
y_max = int(box[3].text)
coords.append([x_min, y_min, x_max, y_max, name])
return coords
def parse_json(sjon_path):
f = open(sjon_path, encoding='utf-8') # 设置以utf-8解码模式读取文件,encoding参数必须设置,否则默认以gbk模式读取文件,当文件中包含中文时,会报错
setting = json.load(f)
z = len(setting) # z为json第一层内容数量
category = [''] * z
xmin = [''] * z
ymin = [''] * z
xmax = [''] * z
ymax = [''] * z
i = 0
coords = list()
for i in range(z):
category[i] = setting[i]['category'] # 注意多重结构的读取语法
xmin[i] = setting[i]['x']
ymin[i] = setting[i]['y']
xmax[i] = setting[i]['w']
ymax[i] = setting[i]['h']
w = xmax[i] / 2
h = ymax[i] / 2
xmin[i] = xmin[i] - w
ymin[i] = ymin[i] - h
xmax[i] = xmin[i] + xmax[i]
ymax[i] = ymin[i] + ymax[i]
coords.append([xmin[i], ymin[i], xmax[i], ymax[i], category[i]])
i = i + 1
return coords
def convert(root_path, source_xml_root_path, target_xml_root_path, phase='train', split=80000):
'''
root_path:
根路径,里面包含JPEGImages(图片文件夹),classes.txt(类别标签),以及annotations文件夹(如果没有则会自动创建,用于保存最后的json)
source_xml_root_path:
VOC xml文件存放的根目录
target_xml_root_path:
coco xml存放的根目录
phase:
状态:'train'或者'test'
split:
train和test图片的分界点数目
'''
now = datetime.datetime.now()
dataset = dict(
info=dict(
description=None,
url=None,
version=None,
year=now.year,
contributor=None,
date_created=now.strftime("%Y-%m-%d %H:%M:%S.%f"),
),
licenses=[dict(url=None, id=0, name=None, )],
images=[
# license, url, file_name, height, width, date_captured, id
],
type="instances",
annotations=[
# segmentation, area, iscrowd, image_id, bbox, category_id, id
],
categories=[
# supercategory, id, name
],
)
# dataset = {'categories': [], 'images': [], 'annotations': []}
# 打开类别标签
with open(os.path.join(root_path, 'classes.txt')) as f:
classes = f.read().strip().split()
# 建立类别标签和数字id的对应关系
for i, cls in enumerate(classes, 1):
dataset['categories'].append({'id': i, 'name': cls, 'supercategory': 'beverage'}) # mark
# 读取images文件夹的图片名称
pics = [f for f in os.listdir(os.path.join(root_path, 'JPEGImages'))]
# 判断是建立训练集还是验证集
phase = 'train'
if phase == 'train':
pics = [line for i, line in enumerate(pics) if i <= split]
elif phase == 'val':
pics = [line for i, line in enumerate(pics) if i > split]
print('---------------- start convert ---------------')
bnd_id = 1 # 初始为1
source_json_root_path = './VOC2007/json'
for i, pic in enumerate(pics):
# print('pic '+str(i+1)+'/'+str(len(pics)))
xml_path = os.path.join(source_xml_root_path, pic[:-4] + '.xml')
# json_path = os.path.join(source_json_root_path, pic[:-4] + '.json')
pic_path = os.path.join(root_path, 'JPEGImages/' + pic)
# 用opencv读取图片,得到图像的宽和高
im = cv2.imread(pic_path)
height, width, _ = im.shape
# 添加图像的信息到dataset中
dataset['images'].append({'file_name': pic,
'id': i,
'width': width,
'height': height})
try:
coords = parse_xml(xml_path)
# print(coords)
except:
# coords = parse_json(json_path)
# print(coords)
# print(pic[:-4] + '.xml not exists~')
continue
for coord in coords:
xn = int(coord[0])
yn = int(coord[1])
xa = int(coord[2])
ya = int(coord[3])
if xn == xa:
xn = xn - 0.01
if yn == ya:
yn = yn - 0.01
# x_min
x1 = min(xn,xa)
# y_min
y1 = min(yn,ya)
# x_max
x2 = max(xn,xa)
# y_max
y2 = max(yn,ya)
assert x1 < x2
assert y1 < y2
# name
name = coord[4]
cls_id = classes.index(name) + 1 # 从1开始
width = max(0, x2 - x1)
height = max(0, y2 - y1)
dataset['annotations'].append({
'area': width * height,
'bbox': [x1, y1, width, height],
'category_id': int(cls_id),
'id': bnd_id,
'image_id': i,
'iscrowd': 0,
# mask, 矩形是从左上角点按顺时针的四个顶点
'segmentation': [[x1, y1, x2, y1, x2, y2, x1, y2]]
})
bnd_id += 1
# 保存结果的文件夹
folder = os.path.join(target_xml_root_path, 'annotations')
if os.path.exists(folder):
shutil.rmtree(folder)
os.makedirs(folder)
json_name = os.path.join(target_xml_root_path, 'annotations/instances_{}2014.json'.format(phase))
with open(json_name, 'w') as f:
json.dump(dataset, f)
if __name__ == '__main__':
convert(root_path="./VOC2007",
source_xml_root_path='./VOC2007/Annotations',
target_xml_root_path='./data_coco')