将VISDRONE标签数据集转换成COCO格式+在MMdetection上训练

最新推荐文章于 2024-08-01 21:07:10 发布

小趴菜——

最新推荐文章于 2024-08-01 21:07:10 发布

阅读量694

点赞数

文章标签： python pytorch 深度学习

本文链接：https://blog.csdn.net/zj_xd/article/details/131033073

版权

参考:

(178条消息) 将visdrone数据集转化为coco格式并在mmdetection上训练,附上转好的json文件_S5242的博客-CSDN博客 (178条消息) 将visdrone数据集转化为coco格式并在mmdetection上训练,附上转好的json文件_S5242的博客-CSDN博客

COCO数据集标注详解 - 知乎 (zhihu.com)

然后参考在标准数据集上训练预定义的模型（待更新） — MMDetection 3.0.0 文档的使用指南的训练&测试中将 balloon dataset 转化为 COCO 格式的代码，可以将自己的代码转换成COCO格式。

这一段参考了：(174条消息) 制作自己的COCO格式数据集，附代码！_如何制作自己的coco数据集_L闰土的博客-CSDN博客

COCO标注格式

用于实例分割的 COCO 数据集格式如下所示，其中的键（key）都是必要的，参考这里来获取更多细节。

{
    "images": [image],
    "annotations": [annotation],
    "categories": [category]
}

其中images是一个字典的列表，储存图像的文件名，高宽和id，id是图象的编号，在annotations中也用到，是唯一的。有多少张图片，该列表就有多少个字典。

image = {
    "id": int,
    "width": int,
    "height": int,
    "file_name": str,
}

annotations是检测框的标注，一个bounding box的格式如下：annotation = {
    "id": int,                           
    "image_id": int,                      #图片id
    "category_id": int,                   #类别id
    "segmentation": RLE or [polygon],     #目标的分割区域
    "area": float,                        #标注区域面积
    "bbox": [x,y,width,height], # (x, y) 为 bbox 左上角的坐标    #目标框
    "iscrowd": 0 or 1,                    #一个目标还是多个目标，与segmentation有关
}
categories 表示所有的类别，有多少类就定义多少，类别的id从1开始，0为背景。格式如下：
categories = [{
    "id": int,
    "name": str,
    "supercategory": str,
}]

其中segmentation是分割的多边形，我对这个键的含义不是很懂，而且我用到的标注只有bbox，所知直接设置成了[[]]，注意一定是两个列表嵌套，area是分割的面积，bbox是检测框的[x, y, w, h]坐标，category_id是类别id，与categories中对应,image_id图像的id，id是bbox的id，每个检测框是唯一的，有几个bbox，annotations里就有几个字典。

我的数据集格式为

684,8,273,116,0,0,0,0
406,119,265,70,0,0,0,0
255,22,119,128,0,0,0,0
1,3,209,78,0,0,0,0

我最终的代码为：

(1)先把txt转换为xml

import os
from PIL import Image

# 把下面的root_dir路径改成你自己的路径即可
root_dir = "/data/VISDRONE_OBJECT_DETECTION/VisDrone2019-DET-test-dev/"
annotations_dir = root_dir + "annotations/"
image_dir = root_dir + "images/"
xml_dir = root_dir + "Annotations_XML/"  # 在工作目录下创建Annotations_XML文件夹保存xml文件
if not os.path.exists(xml_dir):
    os.makedirs(xml_dir)

# 下面的类别也换成你自己数据类别，也可适用于其他的数据集转换
class_name = ['ignored regions', 'pedestrian', 'people', 'bicycle', 'car', 'van',
              'truck', 'tricycle', 'awning-tricycle', 'bus', 'motor', 'others']

for filename in os.listdir(annotations_dir):
    fin = open(annotations_dir + filename, 'r')
    image_name = filename.split('.')[0]
    img = Image.open(image_dir + image_name + ".jpg")  # 若图像数据是“png”转换成“.png”即可
    xml_name = xml_dir + image_name + '.xml'
    with open(xml_name, 'w') as fout:
        fout.write('<annotation>' + '\n')

        fout.write('\t' + '<folder>VOC2007</folder>' + '\n')
        fout.write('\t' + '<filename>' + image_name + '.jpg' + '</filename>' + '\n')

        fout.write('\t' + '<source>' + '\n')
        fout.write('\t\t' + '<database>' + 'VisDrone2019-DET' + '</database>' + '\n')
        fout.write('\t\t' + '<annotation>' + 'VisDrone2019-DET' + '</annotation>' + '\n')
        fout.write('\t\t' + '<image>' + 'flickr' + '</image>' + '\n')
        fout.write('\t\t' + '<flickrid>' + 'Unspecified' + '</flickrid>' + '\n')
        fout.write('\t' + '</source>' + '\n')

        fout.write('\t' + '<owner>' + '\n')
        fout.write('\t\t' + '<flickrid>' + 'LJ' + '</flickrid>' + '\n')
        fout.write('\t\t' + '<name>' + 'LJ' + '</name>' + '\n')
        fout.write('\t' + '</owner>' + '\n')

        fout.write('\t' + '<size>' + '\n')
        fout.write('\t\t' + '<width>' + str(img.size[0]) + '</width>' + '\n')
        fout.write('\t\t' + '<height>' + str(img.size[1]) + '</height>' + '\n')
        fout.write('\t\t' + '<depth>' + '3' + '</depth>' + '\n')
        fout.write('\t' + '</size>' + '\n')

        fout.write('\t' + '<segmented>' + '0' + '</segmented>' + '\n')

        for line in fin.readlines():
            line = line.split(',')
            fout.write('\t' + '<object>' + '\n')
            fout.write('\t\t' + '<name>' + class_name[int(line[5])] + '</name>' + '\n')
            fout.write('\t\t' + '<pose>' + 'Unspecified' + '</pose>' + '\n')
            fout.write('\t\t' + '<truncated>' + line[6] + '</truncated>' + '\n')
            fout.write('\t\t' + '<difficult>' + str(int(line[7])) + '</difficult>' + '\n')
            fout.write('\t\t' + '<bndbox>' + '\n')
            fout.write('\t\t\t' + '<xmin>' + line[0] + '</xmin>' + '\n')
            fout.write('\t\t\t' + '<ymin>' + line[1] + '</ymin>' + '\n')
            # pay attention to this point!(0-based)
            fout.write('\t\t\t' + '<xmax>' + str(int(line[0]) + int(line[2]) - 1) + '</xmax>' + '\n')
            fout.write('\t\t\t' + '<ymax>' + str(int(line[1]) + int(line[3]) - 1) + '</ymax>' + '\n')
            fout.write('\t\t' + '</bndbox>' + '\n')
            fout.write('\t' + '</object>' + '\n')

        fin.close()
        fout.write('</annotation>')

(2)然后再把xml转换成json文件:

#!/usr/bin/python
# xml是voc的格式
# json是coco的格式
import sys, os, json, glob
import xml.etree.ElementTree as ET

INITIAL_BBOXIds = 1
# PREDEF_CLASSE = {}
PREDEF_CLASSE = {'ignored regions':0,'pedestrian': 1, 'people': 2,
                 'bicycle': 3, 'car': 4, 'van': 5, 'truck': 6, 'tricycle': 7,
                 'awning-tricycle': 8, 'bus': 9, 'motor': 10,'others':11}



# function
def get(root, name):
    return root.findall(name)


def get_and_check(root, name, length):
    vars = root.findall(name)
    if len(vars) == 0:
        raise NotImplementedError('Can not find %s in %s.' % (name, root.tag))
    if length > 0 and len(vars) != length:
        raise NotImplementedError('The size of %s is supposed to be %d, but is %d.' % (name, length, len(vars)))
    if length == 1:
        vars = vars[0]
    return vars


def convert(xml_paths, out_json):
    json_dict = {'images': [], 'type': 'instances',
                 'categories': [], 'annotations': []}
    categories = PREDEF_CLASSE
    bbox_id = INITIAL_BBOXIds
    for image_id, xml_f in enumerate(xml_paths):

        # 进度输出
        sys.stdout.write('\r>> Converting image %d/%d' % (
            image_id + 1, len(xml_paths)))
        sys.stdout.flush()

        tree = ET.parse(xml_f)
        root = tree.getroot()
        filename = get_and_check(root, 'filename', 1).text
        size = get_and_check(root, 'size', 1)
        width = int(get_and_check(size, 'width', 1).text)
        height = int(get_and_check(size, 'height', 1).text)
        image = {'file_name': filename, 'height': height,
                 'width': width, 'id': image_id + 1}
        json_dict['images'].append(image)
        ## Cruuently we do not support segmentation
        # segmented = get_and_check(root, 'segmented', 1).text
        # assert segmented == '0'

        for obj in get(root, 'object'):
            category = get_and_check(obj, 'name', 1).text
            if category not in categories:
                new_id = max(categories.values()) + 1
                categories[category] = new_id
            category_id = categories[category]
            bbox = get_and_check(obj, 'bndbox', 1)
            xmin = int(get_and_check(bbox, 'xmin', 1).text) - 1
            ymin = int(get_and_check(bbox, 'ymin', 1).text) - 1
            xmax = int(get_and_check(bbox, 'xmax', 1).text)
            ymax = int(get_and_check(bbox, 'ymax', 1).text)
            if xmax <= xmin or ymax <= ymin:
                continue
            o_width = abs(xmax - xmin)
            o_height = abs(ymax - ymin)
            ann = {'area': o_width * o_height, 'iscrowd': 0, 'image_id': image_id + 1,
                   'bbox': [xmin, ymin, o_width, o_height], 'category_id': category_id,
                   'id': bbox_id, 'ignore': 0, 'segmentation': []}
            json_dict['annotations'].append(ann)
            bbox_id = bbox_id + 1

    for cate, cid in categories.items():
        cat = {'supercategory': 'none', 'id': cid, 'name': cate}
        json_dict['categories'].append(cat)

    # json_file = open(out_json, 'w')
    # json_str = json.dumps(json_dict)
    # json_file.write(json_str)
    # json_file.close() # 快
    json.dump(json_dict, open(out_json, 'w'), indent=4)  # indent=4 更加美观显示 慢


if __name__ == '__main__':
    xml_path = '/data/VISDRONE_OBJECT_DETECTION/VisDrone2019-DET-test-dev/Annotations_XML/'  # 改一下读取xml文件位置
    xml_file = glob.glob(os.path.join(xml_path, '*.xml'))
    convert(xml_file, '/data/VISDRONE_OBJECT_DETECTION/VisDrone2019-DET-test-dev/test_dev.json')  # 这里是生成的json保存位置，改一下

可以用下面这个代码debug看json文件内容是否符合要求：

import json
f = open('E:\ICCV2023\object detection\VisDrone2019-DET-val\coco_file\coco_val.json','r')
contect = f.read()
a = json.loads(contect)
f.close()