这里是前言!!
从VOC数据集转换到COCO数据集是一个常见的任务,尤其是在计算机视觉领域,特别是在对象检测和实例分割等任务中。VOC数据集和COCO数据集是两种广泛使用的标注格式,它们各自具有不同的结构和特点。下面是关于如何将VOC数据集转换为COCO数据集的基本介绍:
一、VOC数据集与COCO数据集介绍
VOC数据集格式
VOC数据集通常包含图像文件和XML标注文件。XML文件通常遵循PASCAL VOC格式,其中包含有关图像中对象的信息,如对象类别、边界框坐标等。
<annotation>
<filename>000001.jpg</filename>
<size>
<width>640</width>
<height>480</height>
<depth>3</depth>
</size>
<object>
<name>car</name>
<bndbox>
<xmin>100</xmin>
<ymin>100</ymin>
<xmax>200</xmax>
<ymax>200</ymax>
</bndbox>
</object>
</annotation>
COCO数据集格式
COCO数据集使用JSON文件来存储标注信息。它支持多种类型的标注,包括对象检测、实例分割、关键点检测等。每个对象都有一个全局唯一的ID,以及类别标签、边界框坐标等信息
{
"images": [
{
"id": 1,
"width": 640,
"height": 480,
"file_name": "000001.jpg"
}
],
"annotations": [
{
"id": 1,
"image_id": 1,
"category_id": 3,
"bbox": [100, 100, 100, 100],
"area": 10000,
"iscrowd": 0
}
],
"categories": [
{"id": 1, "name": "person"},
{"id": 2, "name": "bicycle"},
{"id": 3, "name": "car"},
...
]
}
二、使用步骤
1.划分数据集
代码如下(示例):
import os
import random
trainval_percent = 0.1
train_percent = 0.9
xmlfilepath = r"D:\beijing_jiebang\VOCdevkit\VOC_resize\ann"
txtsavepath = r"D:\beijing_jiebang\VOCdevkit\VOC_resize\imgeset\main"
total_xml = os.listdir(xmlfilepath)
print (total_xml)
num = len(total_xml)
list = range(num)
#总体取出10%的个数(测试集与验证集个数)
tv = int(num * trainval_percent)
#从10%中再取去90%用作验证集的个数
tr = int(tv * train_percent)
#trainval:包括测试集与验证集
trainval = random.sample(list, tv)#从list中随机获取tv个元素,作为一个片断返回
train = random.sample(trainval, tr)
print(train)
ftrainval = open(r'D:\beijing_jiebang\VOCdevkit\VOC_resize\imgeset\main/trainval.txt', 'w')
print (ftrainval)
ftest = open(r'D:\beijing_jiebang\VOCdevkit\VOC_resize\imgeset\main/val.txt', 'w')
ftrain = open(r'D:\beijing_jiebang\VOCdevkit\VOC_resize\imgeset\main/train.txt', 'w')
fval = open(r'D:\beijing_jiebang\VOCdevkit\VOC_resize\imgeset\main/test.txt', 'w')
for i in list:
name = total_xml[i][:-4] + '\n'#也就是去除‘.xml’这四个字符
if i in trainval:
ftrainval.write(name)
if i in train:
ftest.write(name)
else:
fval.write(name)
else:
ftrain.write(name)
ftrainval.close()
ftrain.close()
fval.close()
ftest.close()
2.设置自己的数据类别
代码如下(示例):
import sys
import os
import json
import xml.etree.ElementTree as ET
START_BOUNDING_BOX_ID = 0
# PRE_DEFINE_CATEGORIES = {'SF6ylb': 1, 'aqmzc': 2, 'badge': 3, 'bjzc': 4, 'bjzc0': 5, 'bmwh': 6,
# 'ddjt': 7, 'drq': 8, 'drqgd': 9, 'ecjxh': 10, 'ecjxh0': 11, 'fenzha': 12, 'fire': 13, 'gbps': 14,
# 'gbqs': 15, 'glove': 16,'gzzc':17, 'hezha':18, 'hzyw':19, 'jdyxx':20, 'jdyxxsd':21, 'jsxs':22,
# 'jyh': 23, 'operatingbar':24,'powerchecker':25, 'pzq':26, 'pzqcd':27, 'smoke':28, 'wcaqm':29,
# 'wcgz': 30, 'wrongglove':31, 'xldlb':32,'xmbhyc':33, 'xmbhzc':34, 'xy':35, 'ylb':36, 'yljdq':37,
# 'ylsff': 38, 'ywb':39, 'ywc':40, 'ywj':41, 'yx':42, 'yxdgsg':43,'yxdgsg0':44, 'zzyb':45} # 修改的地方,修改为自己的类别
# If necessary, pre-define category and its id
# PRE_DEFINE_CATEGORIES = {"aeroplane": 1, "bicycle": 2, "bird": 3, "boat": 4,
# "bottle":5, "bus": 6, "car": 7, "cat": 8, "chair": 9,
# "cow": 10, "diningtable": 11, "dog": 12, "horse": 13,
# "motorbike": 14, "person": 15, "pottedplant": 16,
# "sheep": 17, "sofa": 18, "train": 19, "tvmonitor": 20}
def get(root, name):
vars = root.findall(name)
return vars
def get_and_check(root, name, length):
vars = root.findall(name)
if len(vars) == 0:
raise NotImplementedError('Can not find %s in %s.' % (name, root.tag))
if length > 0 and len(vars) != length:
raise NotImplementedError('The size of %s is supposed to be %d, but is %d.' % (name, length, len(vars)))
if length == 1:
vars = vars[0]
return vars
def get_filename_as_int(filename):
try:
filename = os.path.splitext(filename)[0]
return filename
except:
raise NotImplementedError('Filename %s is supposed to be an integer.' % (filename))
# xml_list为xml文件存放的txt文件名 xml_dir为真实xml的存放路径 json_file为存放的json路径
def convert(xml_list, xml_dir, json_file):
list_fp = open(xml_list, 'r')
json_dict = {"images": [], "type": "instances", "annotations": [],
"categories": []}
categories = PRE_DEFINE_CATEGORIES
bnd_id = START_BOUNDING_BOX_ID
for line in list_fp:
line = line.strip()
line = line + ".xml"
print("Processing %s" % (line))
xml_f = os.path.join(xml_dir, line)
# tree = ET.parse(xml_f)
# root = tree.getroot()
xml_file = open(xml_f, encoding='gbk')
root = ET.parse(xml_file).getroot()
path = get(root, 'path')
if len(path) == 1:
filename = os.path.basename(path[0].text)
elif len(path) == 0:
filename = get_and_check(root, 'filename', 1).text
else:
raise NotImplementedError('%d paths found in %s' % (len(path), line))
## The filename must be a number
image_id = get_filename_as_int(filename)
size = get_and_check(root, 'size', 1)
width = int(get_and_check(size, 'width', 1).text)
height = int(get_and_check(size, 'height', 1).text)
image = {'file_name': filename, 'height': height, 'width': width,
'id': image_id}
json_dict['images'].append(image)
## Cruuently we do not support segmentation
# segmented = get_and_check(root, 'segmented', 1).text
# assert segmented == '0'
for obj in get(root, 'object'):
category = get_and_check(obj, 'name', 1).text
if category not in categories:
new_id = len(categories)
categories[category] = new_id
category_id = categories[category]
bndbox = get_and_check(obj, 'bndbox', 1)
xmin = int(get_and_check(bndbox, 'xmin', 1).text) - 1
ymin = int(get_and_check(bndbox, 'ymin', 1).text) - 1
xmax = int(get_and_check(bndbox, 'xmax', 1).text)
ymax = int(get_and_check(bndbox, 'ymax', 1).text)
assert (xmax > xmin)
assert (ymax > ymin)
o_width = abs(xmax - xmin)
o_height = abs(ymax - ymin)
ann = {'area': o_width * o_height, 'iscrowd': 0, 'image_id':
image_id, 'bbox': [xmin, ymin, o_width, o_height],
'category_id': category_id, 'id': bnd_id, 'ignore': 0,
'segmentation': []}
json_dict['annotations'].append(ann)
bnd_id = bnd_id + 1
for cate, cid in categories.items():
cat = {'supercategory': 'none', 'id': cid, 'name': cate}
json_dict['categories'].append(cat)
json_fp = open(json_file, 'w')
json_str = json.dumps(json_dict)
json_fp.write(json_str)
json_fp.close()
list_fp.close()
if __name__ == '__main__':
# xml_list为xml文件存放的txt文件名 xml_dir为真实xml的存放路径 json_file为存放的json路径
# xml_list = r'D:\\VOCdevkit\VOC_resize\imgeset\main/test.txt'
# xml_list = r'D:\VOCdevkit\VOC_resize\imgeset\main/train.txt'
xml_list = r'D:\\VOCdevkit\VOC_resize\imgeset\main/val.txt'
xml_dir = r'D:\\VOCdevkit\VOC_resize\ann'
json_dir = r'D:\\VOCdevkit\data\annotations\instances_val2017.json' # 注意!!!这里test.json先要自己创建,不然
#json_dir = r'D:\\VOCdevkit\data\annotations\instances_train2017.json' # 注意!!!这里test.json先要自己创建,不然
#json_dir = r'D:\\VOCdevkit\data\annotations\instances_test2017.json' # 注意!!!这里test.json先要自己创建,不然 #程序回报权限不足
convert(xml_list, xml_dir, json_dir)
该处所保存的json文件需要提前自己创建,空白即可。
3.将划分好的数据,根据txt文件进行复制
# -*- coding: utf-8 -*-
import shutil
# 这个库复制文件比较省事
def objFileName(local_file_name_list):
'''
生成文件名列表
:return:
'''
obj_name_list = []
for i in open(local_file_name_list, 'r'):
obj_name_list.append(i.replace('\n', ''))
return obj_name_list
def copy_img(local_file_name_list,local_img_name,path):
'''
复制、重命名、粘贴文件
:return:
'''
# 指定存放图片的目录
for i in objFileName(local_file_name_list):
new_obj_name = i + '.jpg'
shutil.copy(local_img_name + '/' + new_obj_name, path + '/' + new_obj_name)
if __name__ == '__main__':
#***************************************
#local_file_name_list----># 指定名单(想要复制的所有文件名组成的txt)
#local_img_name---->指定要复制的图片路径
#path---->指定存放图片的目录
#***************************************
local_file_name_list = r"D:\\VOCdevkit\VOC_resize\imgeset\main/val.txt"
local_img_name = r'D:\beijing_jiebang\VOCdevkit\VOC_resize\img'
path = r"D:\\VOCdevkit\data\val2017"
copy_img(local_file_name_list,local_img_name,path)
总结
大概就是这样了,非常的简单,只需要设置好目录即可,以上代码非原创,仅仅是对其进行总结,方便使用。