exdark数据集转coco格式

ssf-yasuo

已于 2022-01-28 19:05:47 修改

阅读量1.2k

点赞数 1

分类专栏：实用代码文章标签： python 深度学习目标检测

于 2022-01-28 15:39:44 首次发布

本文链接：https://blog.csdn.net/weixin_44326452/article/details/122732303

版权

实用代码专栏收录该内容

25 篇文章 2 订阅

订阅专栏

"""
author: Wu
2022/1/28
将exdark数据集转为coco格式的数据集
"""
import json
import os
import cv2
import shutil

img_path = '/data1/wjh/exdark/imgdir'
ann_path = '/data1/wjh/exdark/anndir'
output_img_path = '/data1/wjh/exdark/cocoformat/img'
output_ann_path = '/data1/wjh/exdark/cocoformat/ann/annotations.json'

##############
# copy img
img_fliepath_list = []
fold_list = os.listdir(img_path)
for fold in fold_list:
    img_filename_list = os.listdir(os.path.join(img_path, fold))
    img_fliepath_list += [os.path.join(img_path, fold, img_filename) for img_filename in img_filename_list]
for img_filepath in img_fliepath_list:
    # rename all jpeg JPEG to jpg, PNG to png
    newname = os.path.basename(img_filepath).lower()
    if newname.split('.')[1] == 'jpeg':
        newname = newname.split('.')[0]+'.jpg'
    shutil.copy(img_filepath, os.path.join(output_img_path, newname))
###############

#######################
# find corresponding class and id
label_dict = {
    'Bicycle':2,
    'Boat':9,
    'Bottle':44,
    'Bus':6,
    'Car':3,
    'Cat':17,
    'Chair':62,
    'Cup':47,
    'Dog':18,
    'Motorbike':4,
    'People':1,
    'Table':67
}
categories_dict = {
    'bicycle':2,
    'boat':9,
    'bottle':44,
    'bus':6,
    'car':3,
    'cat':17,
    'chair':62,
    'cup':47,
    'dog':18,
    'motorcycle':4,
    'person':1,
    'dining table':67
}
##########################

# init dataset
dataset = dict()
dataset['images'] = []
dataset['type'] = 'instances'
dataset['annotations'] = []
dataset['categories'] = []
dataset['info'] = None
dataset['licenses'] = None

annotation_id = 0

# add dataset['categories']
for category_name, category_id in categories_dict.items():
    category_item = dict()
    category_item['supercategory'] = category_name
    category_item['id'] = category_id
    category_item['name'] = category_name
    dataset['categories'].append(category_item)

############################
# get ann path cursively
ann_fliepath_list = []
fold_list = os.listdir(ann_path)
for fold in fold_list:
    ann_filename_list = os.listdir(os.path.join(ann_path, fold))
    ann_fliepath_list += [os.path.join(ann_path, fold, ann_filename) for ann_filename in ann_filename_list]
################################


# iter through every txt to generate train.json and test.json
for index, ann_filepath in enumerate(ann_fliepath_list):  
    print(f'processing {index} th txt')

    ########################
    # rename all jpeg JPEG to jpg, PNG to png
    img_name = os.path.basename(ann_filepath)[0:-4]
    img_name = img_name.lower()
    if img_name.split('.')[1] == 'jpeg':
        img_name = img_name.split('.')[0]+'.jpg'
    ############################

    ###################################
    # add dataset['images']
    image = dict()
    image['id'] = index
    image['file_name'] = img_name
    img = cv2.imread(os.path.join(output_img_path, img_name))
    image['width'] = img.shape[1]
    image['height'] = img.shape[0]
    dataset['images'].append(image)
    ########################################

    # read annotations
    with open(ann_filepath, 'r') as f:
        anns = f.readlines()[1:] # cast off first line
    # iter through every row
    for ann in anns:
        words = ann.split() # classname l t w h
        ###############################
        # add annotation
        x = int(words[1])
        y = int(words[2])
        w = int(words[3])
        h = int(words[4]) 
        label = label_dict[words[0]]
        annotation_item = dict()
        annotation_item['segmentation'] = [[x, y, x, y+h, x+w, y+h, x+w, y]]
        annotation_item['image_id'] = image['id']
        annotation_item['iscrowd'] = 0
        annotation_item['bbox'] = [x, y, w, h]
        annotation_item['area'] = w * h
        annotation_item['id'] = annotation_id
        annotation_id = annotation_id + 1
        annotation_item['category_id'] = label
        dataset['annotations'].append(annotation_item)
        ############################################
json.dump(dataset, open(output_ann_path, 'w'))