Pascal VOC转COCO格式亲测有效

灯花不堪剪

已于 2022-11-17 14:43:17 修改

阅读量867

点赞数 2

文章标签：计算机视觉深度学习 python

于 2022-11-17 14:41:23 首次发布

本文链接：https://blog.csdn.net/weixin_45071089/article/details/127902962

版权

Pascal VOC转COCO格式亲测有效

Pascal VOC数据集下载
检查VOC数据集并转换为COCO格式
检验COCO格式是否正确

Pascal VOC数据集下载

1、这三个网址就是数据集的下载网址。
http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar
http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar
http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCdevkit_08-Jun-2007.tar
2、复制其中一条到迅雷下载中，左上角加号出现如图画面，将链接复制进去就开始下载了。
4、迅雷不限速，下载速度非常快，结果展示

原文链接：https://blog.csdn.net/xuechenxing/article/details/90736328

检查VOC数据集并转换为COCO格式

CheckVOC

# https://blog.csdn.net/weixin_40756000/article/details/124462871

import xml.etree.ElementTree as ET
import pickle
import os
from os import listdir, getcwd
from os.path import join
import cv2
import matplotlib.pyplot as plt
from math import sqrt as sqrt
print(os.getcwd())
# 需要检查的数据
sets = [('2007', 'train'), ('2007', 'val')]

# 需要检查的类别
classes = ['face', 'face_mask']

if __name__ == '__main__':
    # GT框宽高统计
    width = []
    height = []

    for year, image_set in sets:
        # 图片ID不带后缀
        image_ids = open('VOC%s/ImageSets/Main/%s.txt'%(year, image_set)).read().strip().split()
        for image_id in image_ids:
            # 图片的路径
            img_path = 'VOC%s/JPEGImages/%s.jpg'%(year, image_id)
            # 这张图片的XML标注路径
            label_file = open('VOC%s/Annotations/%s.xml' % (year, image_id))
            tree = ET.parse(label_file)
            root = tree.getroot()
            try:
                size = root.find('size')    # 图像的size
                img_w = int(size.find('width').text)  # 宽
                img_h = int(size.find('height').text)  # 高
                img = cv2.imread(img_path)
            except:
                print(image_id)
                continue
            for obj in root.iter('object'):     # 解析object字段
                difficult = obj.find('difficult').text
                cls = obj.find('name').text #
                if cls not in classes or int(difficult) == 2:
                    continue
                cls_id = classes.index(cls)

                xmlbox = obj.find('bndbox')
                xmin = int(xmlbox.find('xmin').text)
                ymin = int(xmlbox.find('ymin').text)
                xmax = int(xmlbox.find('xmax').text)
                ymax = int(xmlbox.find('ymax').text)
                obj_w = xmax - xmin
                obj_h = ymax - ymin
                # width.append(w)
                # height.append(h)
                img = cv2.rectangle(img, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (0, 255, 0), 3)    # 对应目标上画框
                # resize图和目标框到固定值
                try:
                    w_change = (obj_w / img_w) * 416
                except:
                    print(image_id)
                h_change = (obj_h / img_h) * 416
                # width.append(w_change)
                # height.append(h_change)
                s = w_change * h_change
                width.append(sqrt(s))
                height.append(w_change / h_change)
            # print(img_path)
            img = cv2.resize(img, (608, 608))
            cv2.imshow('result', img)
            cv2.waitKey()
    plt.plot(width, height, 'ro')
    plt.show()

VOC2COCO

# https://blog.csdn.net/weixin_40756000/article/details/124462871
# 有改动！注意：img_id不能是str类型，必须转为int，否则在用pycocotools时会出现不能读取的错误


# coding:utf-8

# pip install lxml

import os
import glob
import json
import shutil
import numpy as np
import xml.etree.ElementTree as ET

START_BOUNDING_BOX_ID = 1


def get(root, name):
    return root.findall(name)


def get_and_check(root, name, length):
    vars = root.findall(name)
    if len(vars) == 0:
        raise NotImplementedError('Can not find %s in %s.' % (name, root.tag))
    if length > 0 and len(vars) != length:
        raise NotImplementedError('The size of %s is supposed to be %d, but is %d.' % (name, length, len(vars)))
    if length == 1:
        vars = vars[0]
    return vars


def convert(xml_list, json_file):
    json_dict = {"info": ['none'], "license": ['none'], "images": [], "annotations": [], "categories": []}
    categories = pre_define_categories.copy()
    bnd_id = START_BOUNDING_BOX_ID
    all_categories = {}
    for index, line in enumerate(xml_list):
        # print("Processing %s"%(line))
        xml_f = line
        tree = ET.parse(xml_f)
        root = tree.getroot()

        filename = os.path.basename(xml_f)[:-4] + ".jpg"

        image_id = filename.split('.')[0]
        image_id = int(image_id)
        #         print('filename is {}'.format(image_id))

        size = get_and_check(root, 'size', 1)
        width = int(get_and_check(size, 'width', 1).text)
        height = int(get_and_check(size, 'height', 1).text)
        image = {'file_name': filename, 'height': height, 'width': width, 'id': image_id}
        json_dict['images'].append(image)
        ## Cruuently we do not support segmentation
        #  segmented = get_and_check(root, 'segmented', 1).text
        #  assert segmented == '0'
        for obj in get(root, 'object'):
            category = get_and_check(obj, 'name', 1).text
            if category in all_categories:
                all_categories[category] += 1
            else:
                all_categories[category] = 1
            if category not in categories:
                if only_care_pre_define_categories:
                    continue
                new_id = len(categories) + 1
                print(
                    "[warning] category '{}' not in 'pre_define_categories'({}), create new id: {} automatically".format(
                        category, pre_define_categories, new_id))
                categories[category] = new_id
            category_id = categories[category]
            bndbox = get_and_check(obj, 'bndbox', 1)
            xmin = int(float(get_and_check(bndbox, 'xmin', 1).text))
            ymin = int(float(get_and_check(bndbox, 'ymin', 1).text))
            xmax = int(float(get_and_check(bndbox, 'xmax', 1).text))
            ymax = int(float(get_and_check(bndbox, 'ymax', 1).text))
            assert (xmax > xmin), "xmax <= xmin, {}".format(line)
            assert (ymax > ymin), "ymax <= ymin, {}".format(line)
            o_width = abs(xmax - xmin)
            o_height = abs(ymax - ymin)
            ann = {'area': o_width * o_height, 'iscrowd': 0, 'image_id':
                image_id, 'bbox': [xmin, ymin, o_width, o_height],
                   'category_id': category_id, 'id': bnd_id, 'ignore': 0,
                   'segmentation': []}
            json_dict['annotations'].append(ann)
            bnd_id = bnd_id + 1

    for cate, cid in categories.items():
        cat = {'supercategory': 'none', 'id': cid, 'name': cate}
        json_dict['categories'].append(cat)
    json_fp = open(json_file, 'w')
    json_str = json.dumps(json_dict)
    json_fp.write(json_str)
    json_fp.close()
    print("------------create {} done--------------".format(json_file))
    print("find {} categories: {} -->>> your pre_define_categories {}: {}".format(len(all_categories),
                                                                                  all_categories.keys(),
                                                                                  len(pre_define_categories),
                                                                                  pre_define_categories.keys()))
    print("category: id --> {}".format(categories))
    print(categories.keys())
    print(categories.values())


if __name__ == '__main__':
    # xml标注文件夹
    xml_dir = './Annotations'
    # 训练数据的josn文件
    save_json_train = './train.json'
    # 验证数据的josn文件
    save_json_val = './val.json'
    # 验证数据的test文件
    save_json_test = './test.json'
    # 类别，如果是多个类别，往classes中添加类别名字即可，比如['dog', 'person', 'cat']
    classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable",
               "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
    pre_define_categories = {}
    for i, cls in enumerate(classes):
        pre_define_categories[cls] = i + 1

    only_care_pre_define_categories = True

    # 训练数据集比例
    train_ratio = 0.8
    val_ratio = 0.1
    print('xml_dir is {}'.format(xml_dir))
    xml_list = glob.glob(xml_dir + "/*.xml")
    xml_list = np.sort(xml_list)
    #     print('xml_list is {}'.format(xml_list))
    np.random.seed(100)
    np.random.shuffle(xml_list)

    train_num = int(len(xml_list) * train_ratio)
    val_num = int(len(xml_list) * val_ratio)
    print('训练样本数目是 {}'.format(train_num))
    print('验证样本数目是 {}'.format(val_num))
    print('测试样本数目是 {}'.format(len(xml_list) - train_num - val_num))
    xml_list_val = xml_list[:val_num]
    xml_list_train = xml_list[val_num:train_num + val_num]
    xml_list_test = xml_list[train_num + val_num:]
    # 对训练数据集对应的xml进行coco转换
    convert(xml_list_train, save_json_train)
    # 对验证数据集的xml进行coco转换
    convert(xml_list_val, save_json_val)
    # 对测试数据集的xml进行coco转换
    convert(xml_list_test, save_json_test)

记录一下这里出现的问题，在使用pycocotools检验coco格式是否正确时出现的bug：
imgIds只能是int array，在原来的代码中，直接使用图片名作为imgId，是str格式，检验时会出现错误
请添加图片描述

检验COCO格式是否正确

MS COCO数据集介绍以及pycocotools简单使用

import os
from pycocotools.coco import COCO
from PIL import Image, ImageDraw
import matplotlib.pyplot as plt

json_path = "VOC2007/train.json"
img_path = "VOC2007/train"

# load coco data
coco = COCO(annotation_file=json_path)

# get all image index info
ids = list(sorted(coco.imgs.keys()))
print("number of images: {}".format(len(ids)))

# get all coco class labels
coco_classes = dict([(v["id"], v["name"]) for k, v in coco.cats.items()])

# 遍历前三张图像
for img_id in ids[:3]:
    # 获取对应图像id的所有annotations idx信息
    ann_ids = coco.getAnnIds(imgIds=img_id)

    # 根据annotations idx信息获取所有标注信息
    targets = coco.loadAnns(ann_ids)

    # get image file name
    path = coco.loadImgs(img_id)[0]['file_name']

    # read image
    img = Image.open(os.path.join(img_path, path)).convert('RGB')
    draw = ImageDraw.Draw(img)
    # draw box to image
    for target in targets:
        x, y, w, h = target["bbox"]
        x1, y1, x2, y2 = x, y, int(x + w), int(y + h)
        draw.rectangle((x1, y1, x2, y2))
        draw.text((x1, y1), coco_classes[target["category_id"]])

    # show image
    plt.imshow(img)
    plt.show()