开头
最近实习,在做centernet相关的,而数据集往往是多方或者多日分开标注的,需要整合在一起,而网上关于这方面的相关的代码少之又少,因此贡献一份json合并的代码。该代码修改自柏哥的labelme转coco的代码。
import os
import os.path as osp
import json
import shutil
from tqdm import tqdm
root_dir = './'
jsons_dir = osp.join(root_dir, 'ssd')#需要合并的json所在目录
imgs_dir = osp.join(root_dir, 'imgs')
dst_anno_dir = osp.join(root_dir, 'coco_annos')
os.makedirs(dst_anno_dir, exist_ok=True)
categories = [] # coco categories字段,list of dict
labels = ['up', 'down'] #自定义
supercategory = 'xxx'
for i, label in enumerate(labels):
category = {}
category['id'] = i + 1
category['name'] = label
category['supercategory'] = supercategory
categories.append(category)
label_dict = {} # category_id和label对应信息
for i, label in enumerate(labels):
label_dict[label] = i + 1
image_id = 1
anno_id = 1
images = [] # coco images字段,图片文件信息
annotations = [] # coco annotations字段,标注信息
img_files = [osp.join(imgs_dir, j) for j in os.listdir(imgs_dir) if osp.splitext(j)[-1] == '.jpg']
img_files.sort()
#assert len(img_files) == len(json_files)
print(f"{len(img_files)} == {len(img_files)}")
labels_info = dict()
for js in os.listdir(jsons_dir):
print(js)
#‘ssd’记得修改
with open(osp.join('ssd',js) ,'r',encoding='utf-8') as fp:
json_info = json.load(fp)
for imgt in tqdm(json_info['images']):
try:
image = dict()
image['file_name'] = imgt['file_name']
#print(image['file_name'])
assert osp.exists(osp.join('imgs',image['file_name']))
image['height'] = imgt['height']
image['width'] = imgt['width']
image['id'] = image_id
image_id += 1
images.append(image)
for annt in json_info['annotations']:
if annt['image_id'] == imgt['id']:
annotation = dict()
annotation["id"] = anno_id
anno_id += 1
annotation["image_id"] = image_id - 1
annotation["iscrowd"] = 0
annotation["category_id"] = annt['category_id']
annotation["bbox"] = annt['bbox']
annotation["area"] = annt['area']
annotation["segmentation"] = annt['segmentation']
annotations.append(annotation)
except:
print(imgt['id'])
attrDict = dict()
attrDict['categories'] = categories
attrDict['images'] = images
attrDict['annotations'] = annotations
json_string = json.dumps(attrDict, ensure_ascii=False)
with open(osp.join(dst_anno_dir, 'annos.json'), 'w',encoding='utf-8') as f:
f.write(json_string)
print(len(categories), len(images), len(annotations))
for key, value in labels_info. items():
print(f"{key}: {len(value)}")