数据集处理——voc2coco

梦無限大
已于 2023-02-27 18:20:43 修改
阅读量625
点赞数
文章标签： python 开发语言
于 2023-02-27 17:12:03 首次发布
本文链接：https://blog.csdn.net/Lcz971209/article/details/129245675
版权
本代码用于将voc类型数据集转换为coco类型数据集
# -*- coding=utf-8 -*-
import json
import os
import cv2
import datetime
import xml.etree.ElementTree as ET
import shutil


# 从xml文件中提取bounding box信息, 格式为[[x_min, y_min, x_max, y_max, name]]
def parse_xml(xml_path):
    tree = ET.parse(xml_path)
    root = tree.getroot()
    objs = root.findall('object')
    coords = list()
    for ix, obj in enumerate(objs):
        name = obj.find('name').text
        box = obj.find('bndbox')
        x_min = int(box[0].text)
        y_min = int(box[1].text)
        x_max = int(box[2].text)
        y_max = int(box[3].text)
        coords.append([x_min, y_min, x_max, y_max, name])
    return coords

def parse_json(sjon_path):
    f = open(sjon_path, encoding='utf-8')  # 设置以utf-8解码模式读取文件，encoding参数必须设置，否则默认以gbk模式读取文件，当文件中包含中文时，会报错
    setting = json.load(f)
    z = len(setting)  # z为json第一层内容数量
    category = [''] * z
    xmin = [''] * z
    ymin = [''] * z
    xmax = [''] * z
    ymax = [''] * z
    i = 0
    coords = list()
    for i in range(z):
        category[i] = setting[i]['category']  # 注意多重结构的读取语法

        xmin[i] = setting[i]['x']
        ymin[i] = setting[i]['y']
        xmax[i] = setting[i]['w']
        ymax[i] = setting[i]['h']

        w = xmax[i] / 2
        h = ymax[i] / 2

        xmin[i] = xmin[i] - w
        ymin[i] = ymin[i] - h
        xmax[i] = xmin[i] + xmax[i]
        ymax[i] = ymin[i] + ymax[i]
        coords.append([xmin[i], ymin[i], xmax[i], ymax[i], category[i]])
        i = i + 1
    return coords


def convert(root_path, source_xml_root_path,  target_xml_root_path, phase='train', split=80000):
    '''
    root_path:
        根路径，里面包含JPEGImages(图片文件夹)，classes.txt(类别标签),以及annotations文件夹(如果没有则会自动创建，用于保存最后的json)
    source_xml_root_path:
        VOC xml文件存放的根目录
    target_xml_root_path:
        coco xml存放的根目录
    phase:
        状态：'train'或者'test'
    split:
        train和test图片的分界点数目
    '''
    now = datetime.datetime.now()

    dataset = dict(
        info=dict(
            description=None,
            url=None,
            version=None,
            year=now.year,
            contributor=None,
            date_created=now.strftime("%Y-%m-%d %H:%M:%S.%f"),
        ),
        licenses=[dict(url=None, id=0, name=None, )],
        images=[
            # license, url, file_name, height, width, date_captured, id
        ],
        type="instances",
        annotations=[
            # segmentation, area, iscrowd, image_id, bbox, category_id, id
        ],
        categories=[
            # supercategory, id, name
        ],
    )
    # dataset = {'categories': [], 'images': [], 'annotations': []}

    # 打开类别标签
    with open(os.path.join(root_path, 'classes.txt')) as f:
        classes = f.read().strip().split()

    # 建立类别标签和数字id的对应关系
    for i, cls in enumerate(classes, 1):
        dataset['categories'].append({'id': i, 'name': cls, 'supercategory': 'beverage'})  # mark

    # 读取images文件夹的图片名称
    pics = [f for f in os.listdir(os.path.join(root_path, 'JPEGImages'))]

    # 判断是建立训练集还是验证集
    phase = 'train'
    if phase == 'train':
        pics = [line for i, line in enumerate(pics) if i <= split]
    elif phase == 'val':
        pics = [line for i, line in enumerate(pics) if i > split]

    print('---------------- start convert ---------------')
    bnd_id = 1  # 初始为1
    source_json_root_path = './VOC2007/json'
    for i, pic in enumerate(pics):
        # print('pic  '+str(i+1)+'/'+str(len(pics)))
        xml_path = os.path.join(source_xml_root_path, pic[:-4] + '.xml')
        # json_path = os.path.join(source_json_root_path, pic[:-4] + '.json')
        pic_path = os.path.join(root_path, 'JPEGImages/' + pic)
        # 用opencv读取图片，得到图像的宽和高
        im = cv2.imread(pic_path)
        height, width, _ = im.shape
        # 添加图像的信息到dataset中
        dataset['images'].append({'file_name': pic,
                                  'id': i,
                                  'width': width,
                                  'height': height})
        try:
            coords = parse_xml(xml_path)
            # print(coords)
        except:
        #     coords = parse_json(json_path)
            # print(coords)
            # print(pic[:-4] + '.xml not exists~')
            continue
        for coord in coords:
            xn = int(coord[0])
            yn = int(coord[1])
            xa = int(coord[2])
            ya = int(coord[3])
            if xn == xa:
                xn = xn - 0.01
            if yn == ya:
                yn = yn - 0.01

            # x_min
            x1 = min(xn,xa)
            # y_min
            y1 = min(yn,ya)
            # x_max
            x2 = max(xn,xa)
            # y_max
            y2 = max(yn,ya)
            assert x1 < x2
            assert y1 < y2
            # name
            name = coord[4]
            cls_id = classes.index(name) + 1  # 从1开始
            width = max(0, x2 - x1)
            height = max(0, y2 - y1)
            dataset['annotations'].append({
                'area': width * height,
                'bbox': [x1, y1, width, height],
                'category_id': int(cls_id),
                'id': bnd_id,
                'image_id': i,
                'iscrowd': 0,
                # mask, 矩形是从左上角点按顺时针的四个顶点
                'segmentation': [[x1, y1, x2, y1, x2, y2, x1, y2]]
            })
            bnd_id += 1

    # 保存结果的文件夹
    folder = os.path.join(target_xml_root_path, 'annotations')
    if os.path.exists(folder):
        shutil.rmtree(folder)
    os.makedirs(folder)
    json_name = os.path.join(target_xml_root_path, 'annotations/instances_{}2014.json'.format(phase))
    with open(json_name, 'w') as f:
        json.dump(dataset, f)


if __name__ == '__main__':
    convert(root_path="./VOC2007",
            source_xml_root_path='./VOC2007/Annotations',
            target_xml_root_path='./data_coco')