exdark数据集转coco格式

"""
author: Wu
2022/1/28
将exdark数据集转为coco格式的数据集
"""
import json
import os
import cv2
import shutil

img_path = '/data1/wjh/exdark/imgdir'
ann_path = '/data1/wjh/exdark/anndir'
output_img_path = '/data1/wjh/exdark/cocoformat/img'
output_ann_path = '/data1/wjh/exdark/cocoformat/ann/annotations.json'

##############
# copy img
img_fliepath_list = []
fold_list = os.listdir(img_path)
for fold in fold_list:
    img_filename_list = os.listdir(os.path.join(img_path, fold))
    img_fliepath_list += [os.path.join(img_path, fold, img_filename) for img_filename in img_filename_list]
for img_filepath in img_fliepath_list:
    # rename all jpeg JPEG to jpg, PNG to png
    newname = os.path.basename(img_filepath).lower()
    if newname.split('.')[1] == 'jpeg':
        newname = newname.split('.')[0]+'.jpg'
    shutil.copy(img_filepath, os.path.join(output_img_path, newname))
###############

#######################
# find corresponding class and id
label_dict = {
    'Bicycle':2,
    'Boat':9,
    'Bottle':44,
    'Bus':6,
    'Car':3,
    'Cat':17,
    'Chair':62,
    'Cup':47,
    'Dog':18,
    'Motorbike':4,
    'People':1,
    'Table':67
}
categories_dict = {
    'bicycle':2,
    'boat':9,
    'bottle':44,
    'bus':6,
    'car':3,
    'cat':17,
    'chair':62,
    'cup':47,
    'dog':18,
    'motorcycle':4,
    'person':1,
    'dining table':67
}
##########################

# init dataset
dataset = dict()
dataset['images'] = []
dataset['type'] = 'instances'
dataset['annotations'] = []
dataset['categories'] = []
dataset['info'] = None
dataset['licenses'] = None

annotation_id = 0

# add dataset['categories']
for category_name, category_id in categories_dict.items():
    category_item = dict()
    category_item['supercategory'] = category_name
    category_item['id'] = category_id
    category_item['name'] = category_name
    dataset['categories'].append(category_item)

############################
# get ann path cursively
ann_fliepath_list = []
fold_list = os.listdir(ann_path)
for fold in fold_list:
    ann_filename_list = os.listdir(os.path.join(ann_path, fold))
    ann_fliepath_list += [os.path.join(ann_path, fold, ann_filename) for ann_filename in ann_filename_list]
################################


# iter through every txt to generate train.json and test.json
for index, ann_filepath in enumerate(ann_fliepath_list):  
    print(f'processing {index} th txt')

    ########################
    # rename all jpeg JPEG to jpg, PNG to png
    img_name = os.path.basename(ann_filepath)[0:-4]
    img_name = img_name.lower()
    if img_name.split('.')[1] == 'jpeg':
        img_name = img_name.split('.')[0]+'.jpg'
    ############################

    ###################################
    # add dataset['images']
    image = dict()
    image['id'] = index
    image['file_name'] = img_name
    img = cv2.imread(os.path.join(output_img_path, img_name))
    image['width'] = img.shape[1]
    image['height'] = img.shape[0]
    dataset['images'].append(image)
    ########################################

    # read annotations
    with open(ann_filepath, 'r') as f:
        anns = f.readlines()[1:] # cast off first line
    # iter through every row
    for ann in anns:
        words = ann.split() # classname l t w h
        ###############################
        # add annotation
        x = int(words[1])
        y = int(words[2])
        w = int(words[3])
        h = int(words[4]) 
        label = label_dict[words[0]]
        annotation_item = dict()
        annotation_item['segmentation'] = [[x, y, x, y+h, x+w, y+h, x+w, y]]
        annotation_item['image_id'] = image['id']
        annotation_item['iscrowd'] = 0
        annotation_item['bbox'] = [x, y, w, h]
        annotation_item['area'] = w * h
        annotation_item['id'] = annotation_id
        annotation_id = annotation_id + 1
        annotation_item['category_id'] = label
        dataset['annotations'].append(annotation_item)
        ############################################
json.dump(dataset, open(output_ann_path, 'w'))


  • 1
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 14
    评论
要将VOC格式数据集换为COCO格式,可以按照以下步骤进行操作: 1. 首先,将VOC数据集中的Annotations文件夹里的文件按照训练集和验证集分别放在两个文件夹中。这样可以方便后续的处理。 2. 接下来,将这两个文件夹中的XML文件换成COCO数据集的JSON格式。你可以使用相应的工具或者脚本来完成这个换过程。这个换过程会将XML文件中的标注信息提取出来,并按照COCO数据集格式进行组织和保存。 3. 换完成后,你将得到两个JSON文件,分别对应训练集和验证集。这些JSON文件包含了每个图像的信息、标注框的位置和类别等相关信息。 通过以上步骤,你就成功地将VOC数据集换为了COCO格式,方便后续使用effcientdet等网络进行训练和应用。<span class="em">1</span><span class="em">2</span><span class="em">3</span> #### 引用[.reference_title] - *1* [将VOC格式数据集换为COCO格式,xml格式换成json格式](https://download.csdn.net/download/qq_28257631/85131545)[target="_blank" data-report-click={"spm":"1018.2226.3001.9630","extra":{"utm_source":"vip_chatgpt_common_search_pc_result","utm_medium":"distribute.pc_search_result.none-task-cask-2~all~insert_cask~default-1-null.142^v93^chatsearchT3_1"}}] [.reference_item style="max-width: 50%"] - *2* *3* [voc数据集coco数据集](https://blog.csdn.net/jinjieingbiubiu/article/details/125385906)[target="_blank" data-report-click={"spm":"1018.2226.3001.9630","extra":{"utm_source":"vip_chatgpt_common_search_pc_result","utm_medium":"distribute.pc_search_result.none-task-cask-2~all~insert_cask~default-1-null.142^v93^chatsearchT3_1"}}] [.reference_item style="max-width: 50%"] [ .reference_list ]

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 14
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值