COCO数据集格式(只分析目标检测)
现在使用coco数据集基本上是通过pycocotools来调用的,所以如果想自己构建coco数据集,那么最好阅读pycocotools源码,了解是通过什么方式调用image和annotation
pycocotools源码解读
class COCO:
def __init__(self, annotation_file=None):
"""
Constructor of Microsoft COCO helper class for reading and visualizing annotations.
:param annotation_file (str): location of annotation file
:param image_folder (str): location to the folder that hosts images.
:return:
"""
# load dataset
self.dataset,self.anns,self.cats,self.imgs = dict(),dict(),dict(),dict()
self.imgToAnns, self.catToImgs = defaultdict(list), defaultdict(list)# 创建一个赋值默认为list的字典
if not annotation_file == None:
print('loading annotations into memory...')
tic = time.time()
dataset = json.load(open(annotation_file, 'r'))
assert type(dataset)==dict, 'annotation file format {} not supported'.format(type(dataset))
print('Done (t={:0.2f}s)'.format(time.time()- tic))
self.dataset = dataset# 加载的json
self.createIndex()
def createIndex(self):
# create index
print('creating index...')
anns, cats, imgs = {
}, {
}, {
}
imgToAnns,catToImgs = defaultdict(list),defaultdict(list)#
if 'annotations' in self.dataset:
for ann in self.dataset['annotations']:
imgToAnns[ann['image_id']].append(ann)# 根据imgid找annotation(一个image id对应不止一个annotation)<