前言
之前文章中放了这么一张统计的图,虽然给出了每个类别对应的实例数,一看就是及其不均衡,不利于分类。但这里实例的图片分布并没有,不利于后续的不均衡扩增。
实现
cocoJsonStat.py
import json
from unicodedata import category
import tqdm
import os
json_file = "COD10K_CAM_coco/annotations/instances_train2017.json"
cnt_dict = {}
with open(json_file) as f:
data = json.load(f)
# Create image dict
images = {'%g' % x['id']: x for x in data['images']}
categories = {'%g' % x['id']: x for x in data['categories']}
for x in data['categories']:
if not x['supercategory'] in cnt_dict.keys():
cnt_dict[x['supercategory']] = {x["name"]:{"class_id":x["id"],"cnt":0,"imgs":[]}}
else:
cnt_dict[x['supercategory']][x["name"]] = {"class_id":x["id"],"cnt":0,"imgs":[]}
# Write labels file
for x in data['annotations']:
if x['iscrowd']:
print("啥")
continue
cate = categories['%g' % x['category_id']]
img = images['%g' % x['image_id']]
cur_obj = cnt_dict[cate['supercategory']][cate["name"]]
cur_obj["cnt"] += 1
cur_obj["imgs"].append(img["file_name"])
print(cnt_dict)
save_json_path = os.path.join("./", "%s.json" % "stat_CAM_coco_train.json")
json.dump(cnt_dict, open(save_json_path, 'w'), indent=4)
点到为止,之后的数据处理操作,大伙就各抒己见吧