"""
author: Wu
2022/1/28
将exdark数据集转为coco格式的数据集
"""
import json
import os
import cv2
import shutil
img_path = '/data1/wjh/exdark/imgdir'
ann_path = '/data1/wjh/exdark/anndir'
output_img_path = '/data1/wjh/exdark/cocoformat/img'
output_ann_path = '/data1/wjh/exdark/cocoformat/ann/annotations.json'
##############
# copy img
img_fliepath_list = []
fold_list = os.listdir(img_path)
for fold in fold_list:
img_filename_list = os.listdir(os.path.join(img_path, fold))
img_fliepath_list += [os.path.join(img_path, fold, img_filename) for img_filename in img_filename_list]
for img_filepath in img_fliepath_list:
# rename all jpeg JPEG to jpg, PNG to png
newname = os.path.basename(img_filepath).lower()
if newname.split('.')[1] == 'jpeg':
newname = newname.split('.')[0]+'.jpg'
shutil.copy(img_filepath, os.path.join(output_img_path, newname))
###############
#######################
# find corresponding class and id
label_dict = {
'Bicycle':2,
'Boat':9,
'Bottle':44,
'Bus':6,
'Car':3,
'Cat':17,
'Chair':62,
'Cup':47,
'Dog':18,
'Motorbike':4,
'People':1,
'Table':67
}
categories_dict = {
'bicycle':2,
'boat':9,
'bottle':44,
'bus':6,
'car':3,
'cat':17,
'chair':62,
'cup':47,
'dog':18,
'motorcycle':4,
'person':1,
'dining table':67
}
##########################
# init dataset
dataset = dict()
dataset['images'] = []
dataset['type'] = 'instances'
dataset['annotations'] = []
dataset['categories'] = []
dataset['info'] = None
dataset['licenses'] = None
annotation_id = 0
# add dataset['categories']
for category_name, category_id in categories_dict.items():
category_item = dict()
category_item['supercategory'] = category_name
category_item['id'] = category_id
category_item['name'] = category_name
dataset['categories'].append(category_item)
############################
# get ann path cursively
ann_fliepath_list = []
fold_list = os.listdir(ann_path)
for fold in fold_list:
ann_filename_list = os.listdir(os.path.join(ann_path, fold))
ann_fliepath_list += [os.path.join(ann_path, fold, ann_filename) for ann_filename in ann_filename_list]
################################
# iter through every txt to generate train.json and test.json
for index, ann_filepath in enumerate(ann_fliepath_list):
print(f'processing {index} th txt')
########################
# rename all jpeg JPEG to jpg, PNG to png
img_name = os.path.basename(ann_filepath)[0:-4]
img_name = img_name.lower()
if img_name.split('.')[1] == 'jpeg':
img_name = img_name.split('.')[0]+'.jpg'
############################
###################################
# add dataset['images']
image = dict()
image['id'] = index
image['file_name'] = img_name
img = cv2.imread(os.path.join(output_img_path, img_name))
image['width'] = img.shape[1]
image['height'] = img.shape[0]
dataset['images'].append(image)
########################################
# read annotations
with open(ann_filepath, 'r') as f:
anns = f.readlines()[1:] # cast off first line
# iter through every row
for ann in anns:
words = ann.split() # classname l t w h
###############################
# add annotation
x = int(words[1])
y = int(words[2])
w = int(words[3])
h = int(words[4])
label = label_dict[words[0]]
annotation_item = dict()
annotation_item['segmentation'] = [[x, y, x, y+h, x+w, y+h, x+w, y]]
annotation_item['image_id'] = image['id']
annotation_item['iscrowd'] = 0
annotation_item['bbox'] = [x, y, w, h]
annotation_item['area'] = w * h
annotation_item['id'] = annotation_id
annotation_id = annotation_id + 1
annotation_item['category_id'] = label
dataset['annotations'].append(annotation_item)
############################################
json.dump(dataset, open(output_ann_path, 'w'))
exdark数据集转coco格式
于 2022-01-28 15:39:44 首次发布