如何将注释从PASCAL VOC XML转换为COCO JSON

最新推荐文章于 2023-10-31 18:49:40 发布

孙琪翔

最新推荐文章于 2023-10-31 18:49:40 发布

阅读量574

点赞数

本文链接：https://blog.csdn.net/m0_37909240/article/details/106654857

版权

计算机视觉问题需要带注释的数据集。具体来说，对象检测问题要求框架内的项目以标注的注释为边界。

随着对象检测的发展，描述对象注释的不同文件格式已经出现。这造成了令人沮丧的情况，团队需要投入大量时间从一种注释格式转换为另一种注释格式，而不是专注于更高价值的任务，例如改进深度学习模型架构。

数据科学家花费时间在注释格式之间进行转换就像作者花费时间将Word文档转换为PDF一样。

最常见的注释格式来自挑战和聚集的数据集。随着机器学习研究人员利用这些数据集构建更好的模型，其注释的格式成为非官方的标准协议。

PASCAL VOC XML

PASCAL（模式分析，统计建模和计算学习）是由欧盟资助的卓越网络。从2005年到2012年，PASCAL举办了视觉对象挑战赛（VOC）。PASCAL每年发布对象检测数据集并报告基准。

PASCAL VOC注释以XML格式发布，其中每个图像都有一个随附的XML文件，该文件描述了框架中包含的边界框。例如，在用于血液细胞检测的BCCD数据集中，一个XML注释示例如下所示：

<folder>JPEGImages</folder>

<filename>BloodImage_00000.jpg</filename>

<path>/home/pi/detection_dataset/JPEGImages/BloodImage_00000.jpg</path>

<database>Unknown</database>

</source>

<size>

</size>

<pose>Unspecified</pose>

</bndbox>

</object>

...

...

</object>

</annotation>

注意几个关键的事情：（1），其被注释被提及作为相对路径的图像文件（2）的图像的元数据被包括作为width，height和depth（3）的边界框的像素位置由顶部表示左角和底部右下角为xmin，ymin，xmax，ymax。

COCO JSON

上下文中的通用对象（COCO）数据集源自Microsoft 在2014年发表的一篇论文。数据集“包含91种对象的照片，这些照片很容易被4岁的孩子识别”。在328,000张图像中，总共有250万个带标签的实例。考虑到开源数据的数量和质量，COCO已成为用于测试和证明新模型中最新性能的标准数据集。

COCO注释以JSON格式发布。与PASCAL VOC中每个图像都有其自己的注释文件不同，COCO JSON要求使用一个JSON文件来描述一组图像集合。此外，COCO数据集支持多种类型的计算机视觉问题：关键点检测，对象检测，分段和创建标题。因此，手头任务有不同的格式。这篇文章着重于对象检测。用于对象检测的COCO JSON示例注释如下所示：

{

"info": {

"year": "2020",

"version": "1",

"description": "Exported from roboflow.ai",

"contributor": "",

"url": "https://app.roboflow.ai/datasets/bccd-single-image-example/1",

"date_created": "2020-01-30T23:05:21+00:00"

"licenses": [

{

"id": 1,

"url": "",

"name": "Unknown"

}

"categories": [

{

"id": 0,

"name": "cells",

"supercategory": "none"

{

"id": 1,

"name": "RBC",

"supercategory": "cells"

{

"id": 2,

"name": "WBC",

"supercategory": "cells"

}

"images": [

{

"id": 0,

"license": 1,

"file_name": "0bc08a33ac64b0bd958dd5e4fa8dbc43.jpg",

"height": 480,

"width": 640,

"date_captured": "2020-02-02T23:05:21+00:00"

}

"annotations": [

{

"id": 0,

"image_id": 0,

"category_id": 2,

"bbox": [

260,

177,

231,

199

"area": 45969,

"segmentation": [],

"iscrowd": 0

{

"id": 1,

"image_id": 0,

"category_id": 1,

"bbox": [

78,

336,

106,

"area": 10494,

"segmentation": [],

"iscrowd": 0

{

"id": 2,

"image_id": 0,

"category_id": 1,

"bbox": [

63,

237,

106,

"area": 10494,

"segmentation": [],

"iscrowd": 0

...

]

}

请注意此处的一些关键事项：（1）有关数据集本身及其许可证的信息（2）包括的所有标签均被定义为categories（3）边界框被定义为x，y左上角的坐标随后是边界框width和height。

将VOC XML转换为COCO JSON

流行的注释工具（如LabelImg）在Pascal VOC XML中提供注释。诸如ImageNet之类的某些模型要求Pascal VOC。其他（例如Mask-RCNN）要求使用COCO JSON注释的图像。

使用Python脚本

import os

import argparse

import json

import xml.etree.ElementTree as ET

from typing import Dict, List

from tqdm import tqdm

import re

def get_label2id(labels_path: str) -> Dict[str, int]:

"""id is 1 start"""

with open(labels_path, 'r') as f:

labels_str = f.read().split()

labels_ids = list(range(1, len(labels_str)+1))

return dict(zip(labels_str, labels_ids))

def get_annpaths(ann_dir_path: str = None,

ann_ids_path: str = None,

ext: str = '',

annpaths_list_path: str = None) -> List[str]:

# If use annotation paths list

if annpaths_list_path is not None:

with open(annpaths_list_path, 'r') as f:

ann_paths = f.read().split()

return ann_paths

# If use annotaion ids list

ext_with_dot = '.' + ext if ext != '' else ''

with open(ann_ids_path, 'r') as f:

ann_ids = f.read().split()

ann_paths = [os.path.join(ann_dir_path, aid+ext_with_dot) for aid in ann_ids]

return ann_paths

def get_image_info(annotation_root, extract_num_from_imgid=True):

path = annotation_root.findtext('path')

if path is None:

filename = annotation_root.findtext('filename')

else:

filename = os.path.basename(path)

img_name = os.path.basename(filename)

img_id = os.path.splitext(img_name)[0]

if extract_num_from_imgid and isinstance(img_id, str):

img_id = int(re.findall(r'\d+', img_id)[0])

size = annotation_root.find('size')

width = int(size.findtext('width'))

height = int(size.findtext('height'))

image_info = {

'file_name': filename,

'height': height,

'width': width,

'id': img_id

}

return image_info

def get_coco_annotation_from_obj(obj, label2id):

label = obj.findtext('name')

assert label in label2id, f"Error: {label} is not in label2id !"

category_id = label2id[label]

bndbox = obj.find('bndbox')

xmin = int(bndbox.findtext('xmin')) - 1

ymin = int(bndbox.findtext('ymin')) - 1

xmax = int(bndbox.findtext('xmax'))

ymax = int(bndbox.findtext('ymax'))

assert xmax > xmin and ymax > ymin, f"Box size error !: (xmin, ymin, xmax, ymax): {xmin, ymin, xmax, ymax}"

o_width = xmax - xmin

o_height = ymax - ymin

ann = {

'area': o_width * o_height,

'iscrowd': 0,

'bbox': [xmin, ymin, o_width, o_height],

'category_id': category_id,

'ignore': 0,

'segmentation': [] # This script is not for segmentation

}

return ann

def convert_xmls_to_cocojson(annotation_paths: List[str],

label2id: Dict[str, int],

output_jsonpath: str,

extract_num_from_imgid: bool = True):

output_json_dict = {

"images": [],

"type": "instances",

"annotations": [],

"categories": []

}

bnd_id = 1 # START_BOUNDING_BOX_ID, TODO input as args ?

print('Start converting !')

for a_path in tqdm(annotation_paths):

# Read annotation xml

ann_tree = ET.parse(a_path)

ann_root = ann_tree.getroot()

img_info = get_image_info(annotation_root=ann_root,

extract_num_from_imgid=extract_num_from_imgid)

img_id = img_info['id']

output_json_dict['images'].append(img_info)

for obj in ann_root.findall('object'):

ann = get_coco_annotation_from_obj(obj=obj, label2id=label2id)

ann.update({'image_id': img_id, 'id': bnd_id})

output_json_dict['annotations'].append(ann)

bnd_id = bnd_id + 1

for label, label_id in label2id.items():

category_info = {'supercategory': 'none', 'id': label_id, 'name': label}

output_json_dict['categories'].append(category_info)

with open(output_jsonpath, 'w') as f:

output_json = json.dumps(output_json_dict)

f.write(output_json)

def main():

parser = argparse.ArgumentParser(

description='This script support converting voc format xmls to coco format json')

parser.add_argument('--ann_dir', type=str, default=None,

help='path to annotation files directory. It is not need when use --ann_paths_list')

parser.add_argument('--ann_ids', type=str, default=None,

help='path to annotation files ids list. It is not need when use --ann_paths_list')

parser.add_argument('--ann_paths_list', type=str, default=None,

help='path of annotation paths list. It is not need when use --ann_dir and --ann_ids')

parser.add_argument('--labels', type=str, default=None,

help='path to label list.')

parser.add_argument('--output', type=str, default='output.json', help='path to output json file')

parser.add_argument('--ext', type=str, default='', help='additional extension of annotation file')

args = parser.parse_args()

label2id = get_label2id(labels_path=args.labels)

ann_paths = get_annpaths(

ann_dir_path=args.ann_dir,

ann_ids_path=args.ann_ids,

ext=args.ext,

annpaths_list_path=args.ann_paths_list

)

convert_xmls_to_cocojson(

annotation_paths=ann_paths,

label2id=label2id,

output_jsonpath=args.output,

extract_num_from_imgid=True

)

if __name__ == '__main__':

main()

要在自己的示例上重用此脚本，文件结构必须与示例存储库的文件结构匹配！

最近确实被数据集的标注格式转换的问题搞得很头大，等我完全处理好了以后会针对keypoints（人体关键点）标注的问题做详细介绍。

最近在用远程服务器做实验，环境都是配置好了的，几乎不用担心环境的问题，直接就可以跑实验，有同样需求的朋友可以参考：智星云官网： http://www.ai-galaxy.cn/，淘宝店：https://shop36573300.taobao.com/公众号: 智星AI，

越来越觉得，本科没毕业之前觉得毕业好难，本科毕业工作之后又觉得工作好难，一边工作一边考研，又觉得考研更难，现在眼看还有一年研究生就要毕业了，我才觉得现在才更难。人生也许就是不断的升级打怪的过程，不要放弃，硬着头皮总是能挺过去的。九层之台，起于累土。PEACE。

参考文献：

http://cocodataset.org/#home

https://gluon-cv.mxnet.io/build/examples_datasets/pascal_voc.html

https://blog.roboflow.ai/how-to-convert-annotations-from-voc-xml-to-coco-json/

http://www.ai-galaxy.cn/

https://shop36573300.taobao.com/

孙琪翔

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
1
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫