VisDrone数据集 | 用CenterNet跑通VisDrone数据集（python、ubuntu)

最新推荐文章于 2024-06-26 09:35:27 发布

Liaojiajia-2020

最新推荐文章于 2024-06-26 09:35:27 发布

阅读量7.5k

点赞数 22

分类专栏： # 航拍 & 人脸数据集

本文链接：https://blog.csdn.net/mary_0830/article/details/103361664

版权

航拍 & 人脸数据集专栏收录该内容

10 篇文章 13 订阅

订阅专栏

使用CenterNet跑通VisDrone数据集

一、转格式：Visdrone数据集转成coco格式
二、直接VisdroneDET2019数据集转coco格式：
- 可视化转换json格式是否正确
三、修改配置信息

一、转格式：Visdrone数据集转成coco格式

基本步骤为：txt–>xml–>json

VisDrone 的数据格式：
（x,y,w,h,score,object_categry,truncation,occlusion)
转换成：（图片名称，目标类型，bbox坐标）

第一步：把VisDrone的txt标注格式转换成Voc的xml格式

txt2xml.py

import os
from PIL import Image

# 把下面的路径改成你自己的路径即可
root_dir = "./VisDrone2019-DET-train/"
annotations_dir = root_dir+"annotations/"
image_dir = root_dir + "images/"
xml_dir = root_dir+"Annotations_XML/"
# 下面的类别也换成你自己数据类别，也可适用于其他的数据集转换
class_name = ['ignored regions','pedestrian','people','bicycle','car','van','truck','tricycle','awning-tricycle','bus','motor','others']

for filename in os.listdir(annotations_dir):
    fin = open(annotations_dir+filename, 'r')
    image_name = filename.split('.')[0]
    img = Image.open(image_dir+image_name+".jpg") # 若图像数据是“png”转换成“.png”即可
    xml_name = xml_dir+image_name+'.xml'
    with open(xml_name, 'w') as fout:
        fout.write('<annotation>'+'\n')
        
        fout.write('\t'+'<folder>VOC2007</folder>'+'\n')
        fout.write('\t'+'<filename>'+image_name+'.jpg'+'</filename>'+'\n')
        
        fout.write('\t'+'<source>'+'\n')
        fout.write('\t\t'+'<database>'+'VisDrone2018 Database'+'</database>'+'\n')
        fout.write('\t\t'+'<annotation>'+'VisDrone2018'+'</annotation>'+'\n')
        fout.write('\t\t'+'<image>'+'flickr'+'</image>'+'\n')
        fout.write('\t\t'+'<flickrid>'+'Unspecified'+'</flickrid>'+'\n')
        fout.write('\t'+'</source>'+'\n')
        
        fout.write('\t'+'<owner>'+'\n')
        fout.write('\t\t'+'<flickrid>'+'Haipeng Zhang'+'</flickrid>'+'\n')
        fout.write('\t\t'+'<name>'+'Haipeng Zhang'+'</name>'+'\n')
        fout.write('\t'+'</owner>'+'\n')
        
        fout.write('\t'+'<size>'+'\n')
        fout.write('\t\t'+'<width>'+str(img.size[0])+'</width>'+'\n')
        fout.write('\t\t'+'<height>'+str(img.size[1])+'</height>'+'\n')
        fout.write('\t\t'+'<depth>'+'3'+'</depth>'+'\n')
        fout.write('\t'+'</size>'+'\n')
        
        fout.write('\t'+'<segmented>'+'0'+'</segmented>'+'\n')

        for line in fin.readlines():
            line = line.split(',')
            fout.write('\t'+'<object>'+'\n')
            fout.write('\t\t'+'<name>'+class_name[int(line[5])]+'</name>'+'\n')
            fout.write('\t\t'+'<pose>'+'Unspecified'+'</pose>'+'\n')
            fout.write('\t\t'+'<truncated>'+line[6]+'</truncated>'+'\n')
            fout.write('\t\t'+'<difficult>'+str(int(line[7]))+'</difficult>'+'\n')
            fout.write('\t\t'+'<bndbox>'+'\n')
            fout.write('\t\t\t'+'<xmin>'+line[0]+'</xmin>'+'\n')
            fout.write('\t\t\t'+'<ymin>'+line[1]+'</ymin>'+'\n')
            # pay attention to this point!(0-based)
            fout.write('\t\t\t'+'<xmax>'+str(int(line[0])+int(line[2])-1)+'</xmax>'+'\n')
            fout.write('\t\t\t'+'<ymax>'+str(int(line[1])+int(line[3])-1)+'</ymax>'+'\n')
            fout.write('\t\t'+'</bndbox>'+'\n')
            fout.write('\t'+'</object>'+'\n')
             
        fin.close()
        fout.write('</annotation>')

第二步，检验一下所转换的xml格式画回原图中是否准确

xml_drawbox.py
代码如下：

import os
import os.path
import numpy as np
import xml.etree.ElementTree as xmlET
from PIL import Image, ImageDraw

classes = ('__background__', # always index 0
           'ignored regions','pedestrian','people','bicycle','car','van','truck','tricycle','awning-tricycle','bus','motor','others')

# 把下面的路径改为自己的路径即可
file_path_img = './VisDrone2019-DET-train/images'
file_path_xml = './VisDrone2019-DET-train/Annotations_XML'
save_file_path = './VisDrone2019-DET-train/output'

pathDir = os.listdir(file_path_xml)
for idx in range(len(pathDir)):  
    filename = pathDir[idx]
    tree = xmlET.parse(os.path.join(file_path_xml, filename))
    objs = tree.findall('object')        
    num_objs = len(objs)
    boxes = np.zeros((num_objs, 5), dtype=np.uint16)

    for ix, obj in enumerate(objs):
        bbox = obj.find('bndbox')
        # Make pixel indexes 0-based
        x1 = float(bbox.find('xmin').text) 
        y1 = float(bbox.find('ymin').text) 
        x2 = float(bbox.find('xmax').text) 
        y2 = float(bbox.find('ymax').text) 

        cla = obj.find('name').text
        label = classes.index(cla)

        boxes[ix, 0:4] = [x1, y1, x2, y2]
        boxes[ix, 4] = label

    image_name = os.path.splitext(filename)[0]
    img = Image.open(os.path.join(file_path_img, image_name + '.jpg'))

    draw = ImageDraw.Draw(img)
    for ix in range(len(boxes)):
        xmin = int(boxes[ix, 0])
        ymin = int(boxes[ix, 1])
        xmax = int(boxes[ix, 2])
        ymax = int(boxes[ix, 3])
        draw.rectangle([xmin, ymin, xmax, ymax], outline=(255, 0, 0))
        draw.text([xmin, ymin], classes[boxes[ix, 4]], (255, 0, 0))

    img.save(os.path.join(save_file_path, image_name + '.png'))

使用上面的代码可以使用xml标注格式画回原图中！

第三步，将xml格式转换成json格式

代码如下，下面有两种代码提供使用转换。

方法一、

# -*- coding:utf-8 -*-
# !/usr/bin/env python
 
import argparse
import json
import matplotlib.pyplot as plt
import skimage.io as io
import cv2
from labelme import utils
import numpy as np
import glob
import PIL.Image
import os,sys
  
class PascalVOC2coco(object):
    def __init__(self, xml=[], save_json_path='./new.json'):
        '''
        :param xml: 所有Pascal VOC的xml文件路径组成的列表
        :param save_json_path: json保存位置
        '''
        self.xml = xml
        self.save_json_path = save_json_path
        self.images = []
        self.categories = []
        self.annotations = []
        # self.data_coco = {}
        self.label = []
        self.annID = 1
        self.height = 0
        self.width = 0
        self.ob = []
 
        self.save_json()
 
    def data_transfer(self):
        for num, json_file in enumerate(self.xml):
 
            # 进度输出
            sys.stdout.write('\r>> Converting image %d/%d' % (
                num + 1, len(self.xml)))
            sys.stdout.flush()
 
            self.json_file = json_file
            #print("self.json", self.json_file)
            self.num = num
            #print(self.num)
            path = os.path.dirname(self.json_file)
            #print(path)
            path = os.path.dirname(path)
            #print(path)
            # path=os.path.split(self.json_file)[0]
            # path=os.path.split(path)[0]
            obj_path = glob.glob(os.path.join(path, 'SegmentationObject', '*.png'))
            #print(obj_path)
            with open(json_file, 'r') as fp:
                #print(fp)
                flag = 0
                for p in fp:
                    #print(p)
                    # if 'folder' in p:
                    #     folder =p.split('>')[1].split('<')[0]
                    f_name = 1
                    if 'filename' in p:
                        self.filen_ame = p.split('>')[1].split('<')[0]
                        #print(self.filen_ame)
                        f_name = 0
 
                        self.path = os.path.join(path, 'SegmentationObject', self.filen_ame.split('.')[0] + '.png')
                        #if self.path not in obj_path:
                        #    break
 
 
                    if 'width' in p:
                        self.width = int(p.split('>')[1].split('<')[0])
                        #print(self.width)
                    if 'height' in p:
                        self.height = int(p.split('>')[1].split('<')[0])
 
                        self.images.append(self.image())
                        #print(self.image())
 
                    if flag == 1:
                        self.supercategory = self.ob[0]
                        if self.supercategory not in self.label:
                            self.categories.append(self.categorie())
                            self.label.append(self.supercategory)
 
                        # 边界框
                        x1 = int(self.ob[-4]);
                        y1 = int(self.ob[-3]);
                        x2 = int(self.ob[-2]);
                        y2 = int(self.ob[-1])
                        self.rectangle = [x1, y1, x2, y2]
                        self.bbox = [x1, y1, x2 - x1, y2 - y1]  # COCO 对应格式[x,y,w,h]
 
                        self.annotations.append(self.annotation())
                        self.annID += 1
                        self.ob = []
                        flag = 0
                    elif f_name == 1:
                        if 'name' in p:
                            self.ob.append(p.split('>')[1].split('<')[0])
 
                        if 'xmin' in p:
                            self.ob.append(p.split('>')[1].split('<')[0])
 
                        if 'ymin' in p:
                            self.ob.append(p.split('>')[1].split('<')[0])
 
                        if 'xmax' in p:
                            self.ob.append(p.split('>')[1].split('<')[0])
 
                        if 'ymax' in p:
                            self.ob.append(p.split('>')[1].split('<')[0])
                            flag = 1
 
                    '''
                    if '<object>' in p:
                        # 类别
                        print(next(fp))
                        d = [next(fp).split('>')[1].split('<')[0] for _ in range(7)]
                        self.supercategory = d[0]
                        if self.supercategory not in self.label:
                            self.categories.append(self.categorie())
                            self.label.append(self.supercategory)
                        # 边界框
                        x1 = int(d[-4]);
                        y1 = int(d[-3]);
                        x2 = int(d[-2]);
                        y2 = int(d[-1])
                        self.rectangle = [x1, y1, x2, y2]
                        self.bbox = [x1, y1, x2 - x1, y2 - y1]  # COCO 对应格式[x,y,w,h]
                        self.annotations.append(self.annotation())
                        self.annID += 1
                     '''
 
        sys.stdout.write('\n')
        sys.stdout.flush()
 
    def image(self):
        image = {}
        image['height'] = self.height
        image['width'] = self.width
        image['id'] = self.num + 1
        image['file_name'] = self.filen_ame
        return image
 
    def categorie(self):
        categorie = {}
        categorie['supercategory'] = self.supercategory
        categorie['id'] = len(self.label) + 1  # 0 默认为背景
        categorie['name'] = self.supercategory
        return categorie
 
    def annotation(self):
        annotation = {}
        # annotation['segmentation'] = [self.getsegmentation()]
        annotation['segmentation'] = [list(map(float, self.getsegmentation()))]
        annotation['iscrowd'] = 0
        annotation['image_id'] = self.num + 1
        # annotation['bbox'] = list(map(float, self.bbox))
        annotation['bbox'] = self.bbox
        annotation['category_id'] = self.getcatid(self.supercategory)
        annotation['id'] = self.annID
        return annotation
 
    def getcatid(self, label):
        for categorie in self.categories:
            if label == categorie['name']:
                return categorie['id']
        return -1
 
    def getsegmentation(self):
 
        try:
            mask_1 = cv2.imread(self.path, 0)
            mask = np.zeros_like(mask_1, np.uint8)
            rectangle = self.rectangle
            mask[rectangle[1]:rectangle[3], rectangle[0]:rectangle[2]] = mask_1[rectangle[1]:rectangle[3],
                                                                         rectangle[0]:rectangle[2]]
 
            # 计算矩形中点像素值
            mean_x = (rectangle[0] + rectangle[2]) // 2
            mean_y = (rectangle[1] + rectangle[3]) // 2
 
            end = min((mask.shape[1], int(rectangle[2]) + 1))
            start = max((0, int(rectangle[0]) - 1))
 
            flag = True
            for i in range(mean_x, end):
                x_ = i;
                y_ = mean_y
                pixels = mask_1[y_, x_]
                if pixels != 0 and pixels != 220:  # 0 对应背景 220对应边界线
                    mask = (mask == pixels).astype(np.uint8)
                    flag = False
                    break
            if flag:
                for i in range(mean_x, start, -1):
                    x_ = i;
                    y_ = mean_y
                    pixels = mask_1[y_, x_]
                    if pixels != 0 and pixels != 220:
                        mask = (mask == pixels).astype(np.uint8)
                        break
            self.mask = mask
 
            return self.mask2polygons()
 
        except:
            return [0]
 
    def mask2polygons(self):
        contours = cv2.findContours(self.mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)  # 找到轮廓线
        bbox=[]
        for cont in contours[1]:
            [bbox.append(i) for i in list(cont.flatten())]
            # map(bbox.append,list(cont.flatten()))
        return bbox # list(contours[1][0].flatten())
 
    # '''
    def getbbox(self, points):
        # img = np.zeros([self.height,self.width],np.uint8)
        # cv2.polylines(img, [np.asarray(points)], True, 1, lineType=cv2.LINE_AA)  # 画边界线
        # cv2.fillPoly(img, [np.asarray(points)], 1)  # 画多边形 内部像素值为1
        polygons = points
        mask = self.polygons_to_mask([self.height, self.width], polygons)
        return self.mask2box(mask)
 
    def mask2box(self, mask):
        '''从mask反算出其边框
        mask：[h,w]  0、1组成的图片
        1对应对象，只需计算1对应的行列号（左上角行列号，右下角行列号，就可以算出其边框）
        '''
        # np.where(mask==1)
        index = np.argwhere(mask == 1)
        rows = index[:, 0]
        clos = index[:, 1]
        # 解析左上角行列号
        left_top_r = np.min(rows)  # y
        left_top_c = np.min(clos)  # x
 
        # 解析右下角行列号
        right_bottom_r = np.max(rows)
        right_bottom_c = np.max(clos)
 
        # return [(left_top_r,left_top_c),(right_bottom_r,right_bottom_c)]
        # return [(left_top_c, left_top_r), (right_bottom_c, right_bottom_r)]
        # return [left_top_c, left_top_r, right_bottom_c, right_bottom_r]  # [x1,y1,x2,y2]
        return [left_top_c, left_top_r, right_bottom_c - left_top_c,
                right_bottom_r - left_top_r]  # [x1,y1,w,h] 对应COCO的bbox格式
 
    def polygons_to_mask(self, img_shape, polygons):
        mask = np.zeros(img_shape, dtype=np.uint8)
        mask = PIL.Image.fromarray(mask)
        xy = list(map(tuple, polygons))
        PIL.ImageDraw.Draw(mask).polygon(xy=xy, outline=1, fill=1)
        mask = np.array(mask, dtype=bool)
        return mask
 
    # '''
    def data2coco(self):
        data_coco = {}
        data_coco['images'] = self.images
        data_coco['categories'] = self.categories
        data_coco['annotations'] = self.annotations
        return data_coco
 
    def save_json(self):
        self.data_transfer()
        self.data_coco = self.data2coco()
        # 保存json文件
        json.dump(self.data_coco, open(self.save_json_path, 'w'), indent=4)  # indent=4 更加美观显示
 
 
xml_file = glob.glob('./VisDrone2019-DET-train/Annotations_XML/0000002_00005_d_0000014.xml')
# xml_file=['./Annotations/000032.xml']

PascalVOC2coco(xml_file, './VisDrone2019-DET-train/Annotations/train.json')

在转换过程中遇到的错误
1、遇到的报错：xml.etree.ElementTree.ParseError: syntax error: line 1, column 0
原因是：在处理大规模数据集的时候可能会出现这个错误，将错误的xml删去就可以了！

2、遇到的报错：TypeError: expected str, bytes or os.PathLike object, not list
（类型错误：应为str、bytes或os.PathLike对象，而不是list）

方法二、 在运行前需要把路径和参数设置好。
（注意，这里默认划分的是训练、验证和测试集，如果你不需要测试集，只需要简单的改下代码即可）

# coding:utf-8
# 运行前请先做以下工作：
# pip install lxml
# 将所有的图片及xml文件存放到xml_dir指定的文件夹下，并将此文件夹放置到当前目录下
#

import os
import glob
import json
import shutil
import numpy as np
import xml.etree.ElementTree as ET

START_BOUNDING_BOX_ID = 1
save_path = "."


def get(root, name):
    return root.findall(name)


def get_and_check(root, name, length):
    vars = get(root, name)
    if len(vars) == 0:
        raise NotImplementedError('Can not find %s in %s.' % (name, root.tag))
    if length and len(vars) != length:
        raise NotImplementedError('The size of %s is supposed to be %d, but is %d.' % (name, length, len(vars)))
    if length == 1:
        vars = vars[0]
    return vars


def convert(xml_list, json_file):
    json_dict = {"images": [], "type": "instances", "annotations": [], "categories": []}
    categories = pre_define_categories.copy()
    bnd_id = START_BOUNDING_BOX_ID
    all_categories = {}
    for index, line in enumerate(xml_list):
        # print("Processing %s"%(line))
        xml_f = line
        tree = ET.parse(xml_f)
        root = tree.getroot()

        filename = os.path.basename(xml_f)[:-4] + ".jpg"
        image_id = 20190000001 + index
        size = get_and_check(root, 'size', 1)
        width = int(get_and_check(size, 'width', 1).text)
        height = int(get_and_check(size, 'height', 1).text)
        image = {'file_name': filename, 'height': height, 'width': width, 'id': image_id}
        json_dict['images'].append(image)
        #  Currently we do not support segmentation
        segmented = get_and_check(root, 'segmented', 1).text
        assert segmented == '0'
        for obj in get(root, 'object'):
            category = get_and_check(obj, 'name', 1).text
            if category in all_categories:
                all_categories[category] += 1
            else:
                all_categories[category] = 1
            if category not in categories:
                if only_care_pre_define_categories:
                    continue
                new_id = len(categories) + 1
                print(
                    "[warning] category '{}' not in 'pre_define_categories'({}), create new id: {} automatically".format(
                        category, pre_define_categories, new_id))
                categories[category] = new_id
            category_id = categories[category]
            bndbox = get_and_check(obj, 'bndbox', 1)
            xmin = int(float(get_and_check(bndbox, 'xmin', 1).text))
            ymin = int(float(get_and_check(bndbox, 'ymin', 1).text))
            xmax = int(float(get_and_check(bndbox, 'xmax', 1).text))
            ymax = int(float(get_and_check(bndbox, 'ymax', 1).text))
            assert (xmax > xmin), "xmax <= xmin, {}".format(line)
            assert (ymax > ymin), "ymax <= ymin, {}".format(line)
            o_width = abs(xmax - xmin)
            o_height = abs(ymax - ymin)
            ann = {'area': o_width * o_height, 'iscrowd': 0, 'image_id':
                image_id, 'bbox': [xmin, ymin, o_width, o_height],
                   'category_id': category_id, 'id': bnd_id, 'ignore': 0,
                   'segmentation': []}
            json_dict['annotations'].append(ann)
            bnd_id = bnd_id + 1

    for cate, cid in categories.items():
        cat = {'supercategory': 'food', 'id': cid, 'name': cate}
        json_dict['categories'].append(cat)
    json_fp = open(json_file, 'w')
    json_str = json.dumps(json_dict)
    json_fp.write(json_str)
    json_fp.close()
    print("------------create {} done--------------".format(json_file))
    print("find {} categories: {} -->>> your pre_define_categories {}: {}".format(len(all_categories),
                                                                                  all_categories.keys(),
                                                                                  len(pre_define_categories),
                                                                                  pre_define_categories.keys()))
    print("category: id --> {}".format(categories))
    print(categories.keys())
    print(categories.values())


if __name__ == '__main__':
    # 定义你自己的类别
    classes = ['aaa', 'bbb', 'ccc', 'ddd', 'eee', 'fff']
    pre_define_categories = {}
    for i, cls in enumerate(classes):
        pre_define_categories[cls] = i + 1
    # 这里也可以自定义类别id，把上面的注释掉换成下面这行即可
    # pre_define_categories = {'a1': 1, 'a3': 2, 'a6': 3, 'a9': 4, "a10": 5}
    only_care_pre_define_categories = True  # or False

    # 保存的json文件
    save_json_train = 'train_food.json'
    save_json_val = 'val_food.json'
    save_json_test = 'test_food.json'

    # 初始文件所在的路径
    xml_dir = "./image_and_xml"
    xml_list = glob.glob(xml_dir + "/*.xml")
    xml_list = np.sort(xml_list)

    # 打乱数据集
    np.random.seed(100)
    np.random.shuffle(xml_list)

    # 按比例划分打乱后的数据集
    train_ratio = 0.8
    val_ratio = 0.1
    train_num = int(len(xml_list) * train_ratio)
    val_num = int(len(xml_list) * val_ratio)
    xml_list_train = xml_list[:train_num]
    xml_list_val = xml_list[train_num: train_num+val_num]
    xml_list_test = xml_list[train_num+val_num:]

    # 将xml文件转为coco文件，在指定目录下生成三个json文件（train/test/food）
    convert(xml_list_train, save_json_train)
    convert(xml_list_val, save_json_val)
    convert(xml_list_test, save_json_test)

    # # 将图片按照划分后的结果进行存放
    # if os.path.exists(save_path + "/annotations"):
    #     shutil.rmtree(save_path + "/annotations")
    # os.makedirs(save_path + "/annotations")
    # if os.path.exists(save_path + "/images_divide/train"):
    #     shutil.rmtree(save_path + "/images_divide/train")
    # os.makedirs(save_path + "/images_divide/train")
    # if os.path.exists(save_path + "/images_divide/val"):
    #     shutil.rmtree(save_path + "/images_divide/val")
    # os.makedirs(save_path + "/images_divide/val")
    # if os.path.exists(save_path + "/images_divide/test"):
    #     shutil.rmtree(save_path + "/images_divide/test")
    # os.makedirs(save_path + "/images_divide/test")

    # # 按需执行，生成3个txt文件，存放相应的文件名称
    # f1 = open("./train.txt", "w")
    # for xml in xml_list_train:
    #     img = xml[:-4] + ".jpg"
    #     f1.write(os.path.basename(xml)[:-4] + "\n")
    #     shutil.copyfile(img, save_path + "/images_divide/train/" + os.path.basename(img))
    #
    # f2 = open("val.txt", "w")
    # for xml in xml_list_val:
    #     img = xml[:-4] + ".jpg"
    #     f2.write(os.path.basename(xml)[:-4] + "\n")
    #     shutil.copyfile(img, save_path + "/images_divide/val/" + os.path.basename(img))
    #
    # f3 = open("test.txt", "w")
    # for xml in xml_list_val:
    #     img = xml[:-4] + ".jpg"
    #     f2.write(os.path.basename(xml)[:-4] + "\n")
    #     shutil.copyfile(img, save_path + "/images_divide/test/" + os.path.basename(img))
    #
    # f1.close()
    # f2.close()
    # f3.close()

    print("-" * 50)
    print("train number:", len(xml_list_train))
    print("val number:", len(xml_list_val))
    print("test number:", len(xml_list_val))

详细步骤参考

方法三（更新于2020.10.8）
xml2json2.py

import xml.etree.ElementTree as ET
import os
import json
 
coco = dict()
coco['images'] = []
coco['type'] = 'instances'
coco['annotations'] = []
coco['categories'] = []
 
category_set = dict()
set_image = set()
 
category_item_id = 0
image_id = 0
annotation_id = 0
 
def addCatItem(name):
    global category_item_id
    category_item = dict()
    # category_item['supercategory'] = 'none'
    category_item_id += 1
    category_item['id'] = category_item_id
    category_item['name'] = name
    coco['categories'].append(category_item)
    category_set[name] = category_item_id
    return category_item_id
 
def addImgItem(file_name, size):
    global image_id
    if file_name is None:
        raise Exception('Could not find filename tag in xml file.')
    if size['width'] is None:
        raise Exception('Could not find width tag in xml file.')
    if size['height'] is None:
        raise Exception('Could not find height tag in xml file.')
    image_id += 1
    image_item = dict()
    image_item['id'] = int(file_name[:-4])
    image_item['file_name'] = file_name
    image_item['width'] = size['width']
    image_item['height'] = size['height']
    coco['images'].append(image_item)
    set_image.add(file_name)
    return image_id
 
def addAnnoItem(object_name, file_name, category_id, bbox):
    global annotation_id
    annotation_item = dict()
    annotation_item['segmentation'] = []
    seg = []
    #bbox[] is x,y,w,h
    #left_top
    seg.append(bbox[0])
    seg.append(bbox[1])
    #left_bottom
    seg.append(bbox[0])
    seg.append(bbox[1] + bbox[3])
    #right_bottom
    seg.append(bbox[0] + bbox[2])
    seg.append(bbox[1] + bbox[3])
    #right_top
    seg.append(bbox[0] + bbox[2])
    seg.append(bbox[1])
 
    annotation_item['segmentation'].append(seg)
 
    annotation_item['area'] = bbox[2] * bbox[3]
    annotation_item['iscrowd'] = 0
    annotation_item['ignore'] = 0
    # import pdb
    # pdb.set_trace()
    annotation_item['image_id'] = int(file_name[:-4])
    annotation_item['bbox'] = bbox
    
    annotation_item['category_id'] = category_id
    annotation_id += 1
    annotation_item['id'] = annotation_id
    coco['annotations'].append(annotation_item)
 
def parseXmlFiles(xml_path): 
    for f in os.listdir(xml_path):
        if not f.endswith('.xml'):
            continue
        
        bndbox = dict()
        size = dict()
        current_image_id = None
        current_category_id = None
        file_name = None
        size['width'] = None
        size['height'] = None
        size['depth'] = None
 
        xml_file = os.path.join(xml_path, f)
        # print(xml_file)
 
        tree = ET.parse(xml_file)
        root = tree.getroot()
        if root.tag != 'annotation':
            raise Exception('pascal voc xml root element should be annotation, rather than {}'.format(root.tag))
 
        #elem is <folder>, <filename>, <size>, <object>
        for elem in root:
            current_parent = elem.tag
            current_sub = None
            object_name = None
            
            if elem.tag == 'folder':
                continue
            
            if elem.tag == 'filename':
                file_name = elem.text
                if file_name in category_set:
                    raise Exception('file_name duplicated')
                
            #add img item only after parse <size> tag
            elif current_image_id is None and file_name is not None and size['width'] is not None:
                import pdb; pdb.set_trace()
                if file_name not in set_image:
                    # current_image_id = addImgItem(file_name, size)
                    current_image_id = file_name[:-4]
                    print('add image with {} and {}'.format(file_name, size))
                else:
                    raise Exception('duplicated image: {}'.format(file_name)) 
            #subelem is <width>, <height>, <depth>, <name>, <bndbox>
            for subelem in elem:
                bndbox ['xmin'] = None
                bndbox ['xmax'] = None
                bndbox ['ymin'] = None
                bndbox ['ymax'] = None
                
                current_sub = subelem.tag
                if current_parent == 'object' and subelem.tag == 'name':
                    object_name = subelem.text
                    if object_name not in category_set:
                        current_category_id = addCatItem(object_name)
                    else:
                        current_category_id = category_set[object_name]
 
                elif current_parent == 'size':
                    if size[subelem.tag] is not None:
                        raise Exception('xml structure broken at size tag.')
                    size[subelem.tag] = int(subelem.text)
 
                #option is <xmin>, <ymin>, <xmax>, <ymax>, when subelem is <bndbox>
                for option in subelem:
                    if current_sub == 'bndbox':
                        if bndbox[option.tag] is not None:
                            raise Exception('xml structure corrupted at bndbox tag.')
                        bndbox[option.tag] = int(option.text)
 
                #only after parse the <object> tag
                if bndbox['xmin'] is not None:
                    if object_name is None:
                        raise Exception('xml structure broken at bndbox tag')
                    if current_image_id is None:
                        raise Exception('xml structure broken at bndbox tag')
                    if current_category_id is None:
                        raise Exception('xml structure broken at bndbox tag')
                    bbox = []
                    #x
                    bbox.append(bndbox['xmin'])
                    #y
                    bbox.append(bndbox['ymin'])
                    #w
                    bbox.append(bndbox['xmax'] - bndbox['xmin'])
                    #h
                    bbox.append(bndbox['ymax'] - bndbox['ymin'])
                    print('add annotation with {},{},{},{}'.format(object_name, current_image_id, current_category_id, bbox))
                    addAnnoItem(object_name, file_name, current_category_id, bbox)
 
if __name__ == '__main__':
    xml_path = './annotations_xml'
    json_file = './annotations/instances_val_.json'
    parseXmlFiles(xml_path)
    json.dump(coco, open(json_file, 'w'))

第四步，检查json格式转换的是否正确（即，将转换的数据画回原图中）

（更新于2020.10.8）
json_drawbox.py

from pycocotools.coco import COCO
import cv2
import pandas as pd
import json
 
 
def select(json_path, outpath, image_path):
    json_file = open(json_path)
    infos = json.load(json_file)
    images = infos["images"]
    annos = infos["annotations"]
    assert len(images) == len(images)
    for i in range(len(images)):
        im_id = images[i]["id"]
        im_path = image_path + images[i]["file_name"]
        img = cv2.imread(im_path)
        for j in range(len(annos)):
            # import pdb
            # pdb.set_trace()
            if annos[j]["image_id"] == im_id:
                # import pdb
                # pdb.set_trace()
                x, y, w, h = annos[j]["bbox"]
                x, y, w, h = int(x), int(y), int(w), int(h)
                x2, y2 = x + w, y + h
                # object_name = annos[j][""]
                img = cv2.rectangle(img, (x, y), (x2, y2), (255, 0, 0), thickness=2)
                img_name = outpath + images[i]["file_name"]
                cv2.imwrite(img_name, img)
                # continue
        # print(i)

if __name__ == "__main__":
    # imageidFile = ''
    json_path = ''
    image_path = ''
    outpath = ''
    select(json_path, outpath, image_path)

二、直接VisdroneDET2019数据集转coco格式：

txt2json_visdrone.py

import os
import cv2
from tqdm import tqdm
import json
 
 
def convert_to_cocodetection(dir, output_dir):
    train_dir = os.path.join(dir, "VisDrone2019-DET-train")
    val_dir = os.path.join(dir, "VisDrone2019-DET-val")
    train_annotations = os.path.join(train_dir, "annotations")
    val_annotations = os.path.join(val_dir, "annotations")
    train_images = os.path.join(train_dir, "images")
    val_images = os.path.join(val_dir, "images")
    id_num = 0
 
    categories = [{"id": 1, "name": "pedestrian"},
                  {"id": 2, "name": "people"},
                  {"id": 3, "name": "bicycle"},
                  {"id": 4, "name": "car"},
                  {"id": 5, "name": "van"},
                  {"id": 6, "name": "truck"},
                  {"id": 7, "name": "tricycle"},
                  {"id": 8, "name": "awning-tricycle"},
                  {"id": 9, "name": "bus"},
                  {"id": 10, "name": "motor"}
                  ]
    for mode in ["train", "val"]:
        images = []
        annotations = []
        print(f"start loading {mode} data...")
        if mode == "train":
            set = os.listdir(train_annotations)
            annotations_path = train_annotations
            images_path = train_images
        else:
            set = os.listdir(val_annotations)
            annotations_path = val_annotations
            images_path = val_images
        for i in tqdm(set):
            f = open(annotations_path + "/" + i, "r")
            name = i.replace(".txt", "")
            image = {}
            height, width = cv2.imread(images_path + "/" + name + ".jpg").shape[:2]
            file_name = name + ".jpg"
            image["file_name"] = file_name
            image["height"] = height
            image["width"] = width
            image["id"] = name
            images.append(image)
            for line in f.readlines():
                annotation = {}
                line = line.replace("\n", "")
                if line.endswith(","):  # filter data
                    line = line.rstrip(",")
                line_list = [int(i) for i in line.split(",")]
                # import pdb; pdb.set_trace()
                bbox_xywh = [line_list[0], line_list[1], line_list[2], line_list[3]]
                annotation["image_id"] = name
                # annotation["score"] = line_list[4]
                annotation["bbox"] = bbox_xywh
                annotation["category_id"] = int(line_list[5])
                annotation["id"] = id_num
                annotation["iscrowd"] = 0
                annotation["segmentation"] = []
                annotation["area"] = bbox_xywh[2] * bbox_xywh[3]
                id_num += 1
                annotations.append(annotation)
        dataset_dict = {}
        dataset_dict["images"] = images
        dataset_dict["annotations"] = annotations
        dataset_dict["categories"] = categories
        json_str = json.dumps(dataset_dict)
        with open(f'{output_dir}/VisDrone2019-DET_{mode}_coco.json', 'w') as json_file:
            json_file.write(json_str)
    print("json file write done...")
 
 
def get_test_namelist(dir, out_dir):
    full_path = out_dir + "/" + "test.txt"
    file = open(full_path, 'w')
    for name in tqdm(os.listdir(dir)):
        name = name.replace(".txt", "")
        file.write(name + "\n")
    file.close()
    return None
 
 
def centerxywh_to_xyxy(boxes):
    """
    args:
        boxes:list of center_x,center_y,width,height,
    return:
        boxes:list of x,y,x,y,cooresponding to top left and bottom right
    """
    x_top_left = boxes[0] - boxes[2] / 2
    y_top_left = boxes[1] - boxes[3] / 2
    x_bottom_right = boxes[0] + boxes[2] / 2
    y_bottom_right = boxes[1] + boxes[3] / 2
    return [x_top_left, y_top_left, x_bottom_right, y_bottom_right]
 
 
def centerxywh_to_topleftxywh(boxes):
    """
    args:
        boxes:list of center_x,center_y,width,height,
    return:
        boxes:list of x,y,x,y,cooresponding to top left and bottom right
    """
    x_top_left = boxes[0] - boxes[2] / 2
    y_top_left = boxes[1] - boxes[3] / 2
    width = boxes[2]
    height = boxes[3]
    return [x_top_left, y_top_left, width, height]
 
 
def clamp(coord, width, height):
    if coord[0] < 0:
        coord[0] = 0
    if coord[1] < 0:
        coord[1] = 0
    if coord[2] > width:
        coord[2] = width
    if coord[3] > height:
        coord[3] = height
    return coord
 
 
if __name__ == '__main__':
    convert_to_cocodetection(r"",r"")

转发链接：https://blog.csdn.net/qq_20793791/article/details/110881657
(自用链接）

可视化转换json格式是否正确

json_drawbox.py

# coding:utf-8
# 下面代码包含json格式的ground-truth和predict画框

# --------------------------------------------------------------ground-truth 
# from pycocotools.coco import COCO
# import cv2
# import json
# categories = [{"id": 1, "name": "pedestrian"},
#             {"id": 2, "name": "people"},
#             {"id": 3, "name": "bicycle"},
#             {"id": 4, "name": "car"},
#             {"id": 5, "name": "van"},
#             {"id": 6, "name": "truck"},
#             {"id": 7, "name": "tricycle"},
#             {"id": 8, "name": "awning-tricycle"},
#             {"id": 9, "name": "bus"},
#             {"id": 10, "name": "motor"} ]   

# def select(json_path, outpath, image_path):
#     json_file = open(json_path)
#     infos = json.load(json_file)
#     images = infos["images"]
#     annos = infos["annotations"]
#     assert len(images) == len(images)
#     for i in range(len(images)):
#         im_id = images[i]["id"]
#         im_path = image_path + images[i]["file_name"]
#         img = cv2.imread(im_path)
#         for j in range(len(annos)):
#             if annos[j]["image_id"] == im_id:
#                 x, y, w, h = annos[j]["bbox"]
#                 x, y, w, h = int(x), int(y), int(w), int(h)
#                 x2, y2 = x + w, y + h
#                 category_id = categories[annos[j]["category_id"] - 1]['name']
#                 # import pdb; pdb.set_trace()
#                 cv2.putText(img, category_id, (x,y-5), cv2.FONT_HERSHEY_DUPLEX , 0.6, (0,0,0))
#                 # object_name = annos[j][""]
#                 img = cv2.rectangle(img, (x, y), (x2, y2), (255, 0, 0), thickness=2)
#                 img_name = outpath + images[i]["file_name"]
#                 cv2.imwrite(img_name, img)
#                 # continue
#         # print(i)

# if __name__ == "__main__":
#     # imageidFile = ''
#     json_path = ''
#     image_path = ''
#     outpath = ''
#     select(json_path, outpath, image_path)


# --------------------------------------------------------------------------predict
from pycocotools.coco import COCO
import cv2
import json
import os
categories = [{"id": 1, "name": "pedestrian"},
            {"id": 2, "name": "people"},
            {"id": 3, "name": "bicycle"},
            {"id": 4, "name": "car"},
            {"id": 5, "name": "van"},
            {"id": 6, "name": "truck"},
            {"id": 7, "name": "tricycle"},
            {"id": 8, "name": "awning-tricycle"},
            {"id": 9, "name": "bus"},
            {"id": 10, "name": "motor"} ]   

def select(json_path, outpath, image_path):
    json_file = open(json_path)
    infos = json.load(json_file)
    for i in range(len(infos)):
        out_path = outpath + infos[i]["image_id"] + ".jpg"
        im_path = image_path + infos[i]["image_id"] + ".jpg"
        # import pdb; pdb.set_trace()
        if infos[i]["score"] > 0.5:
            if os.path.exists(out_path):
                img = cv2.imread(out_path)
            else:
                img = cv2.imread(im_path)
            x, y, w, h = infos[i]["bbox"]
            x, y, w, h = int(x), int(y), int(w), int(h)
            x2, y2 = x + w, y + h
            category_id = categories[infos[i]["category_id"] - 1]['name']
            cv2.putText(img, category_id, (x,y-5), cv2.FONT_HERSHEY_DUPLEX , 0.6, (0,0,0))
            img = cv2.rectangle(img, (x, y), (x2, y2), (255, 0, 0), thickness=2)
            cv2.imwrite(out_path, img)


if __name__ == "__main__":
    # imageidFile = ''
    json_path = ''
    image_path = ''
    outpath = ''
    select(json_path, outpath, image_path)

三、修改配置信息

计算所有的图片（包括训练、验证和测试）的均值和标准差，直接将图片存放到同一个文件夹，把路径改下即可。

import cv2, os, argparse
import numpy as np
from tqdm import tqdm

def main():
    dirs = r'./images'  # 修改你自己的图片路径
    img_file_names = os.listdir(dirs)
    m_list, s_list = [], []
    for img_filename in tqdm(img_file_names):
        img = cv2.imread(dirs + '/' + img_filename)
        img = img / 255.0
        m, s = cv2.meanStdDev(img)
        m_list.append(m.reshape((3,)))
        s_list.append(s.reshape((3,)))
    m_array = np.array(m_list)
    s_array = np.array(s_list)
    m = m_array.mean(axis=0, keepdims=True)
    s = s_array.mean(axis=0, keepdims=True)
    print("mean = ", m[0][::-1])
    print("std = ", s[0][::-1])


if __name__ == '__main__':
    main()

实验结果如下面所示：

mean = [0.37867413 0.38535597 0.37134552]
std = [0.19151453 0.1829674 0.19446535]

Liaojiajia-2020

关注

22
点赞
踩
86

收藏

觉得还不错? 一键收藏
33
评论
VisDrone数据集 | 用CenterNet跑通VisDrone数据集（python、ubuntu)

使用CenterNet跑通VisDrone数据集一、Visdrone数据集转成coco格式(txt-->xml-->json)第一步：把VisDrone的txt标注格式转换成Voc的xml格式第二步，检验一下所转换的xml格式画回原图中是否准确第三步，将xml格式转换成json格式一、Visdrone数据集转成coco格式(txt–>xml–>json)首先第一步需要将...
复制链接

扫一扫

专栏目录