pycocotools提供了COCOeval,可以让我们方便地计算coco数据集的mAP。然而,如果我们只想计算少量图像的mAP,该如何做呢?
coco2017数据集由训练集、验证集、测试集组成。训练集中一共拥有118287张图像,验证集中有5000张图像,测试集中有40670张图像。训练集的annotations文件的大小为469.8MB,验证集的annotations文件的大小为20MB,测试集的annotations文件没有提供。如果我们手动分割训练集或者验证集,需要额外的处理,很不方便。因此,本文提出的方法可以在不调整训练集和验证集的条件下,计算一张图像的mAP.
传统的计算过程如下,简单说来就是输入目标检测网络推断得到的检测结果result_files文件和groundtruth文件,计算得到最终的mAP. groundtruth文件中有多少张图像的结果,result_files也要有多少张图像的检测结果。如果图像个数不匹配,会导致计算结果出现问题。(如果result_files文件中的图像个数比groundtruth文件中的个数少,那么会导致mAP偏低)
def coco_eval(result_files, result_types, coco, max_dets=(100, 300, 1000)):
for res_type in result_types:
assert res_type in [
'proposal', 'proposal_fast', 'bbox', 'segm', 'keypoints'
]
if mmcv.is_str(coco):
coco = COCO(coco)
assert isinstance(coco, COCO)
if result_types == ['proposal_fast']:
ar = fast_eval_recall(result_files, coco, np.array(max_dets))
for i, num in enumerate(max_dets):
print('AR@{}\t= {:.4f}'.format(num, ar[i]))
return
for res_type in result_types:
if isinstance(result_files, str):
result_file = result_files
elif isinstance(result_files, dict):
result_file = result_files[res_type]
else:
assert TypeError('result_files must be a str or dict')
assert result_file.endswith('.json')
coco_dets = coco.loadRes(result_file)
img_ids = coco.getImgIds()
iou_type = 'bbox' if res_type == 'proposal' else res_type
cocoEval = COCOeval(coco, coco_dets, iou_type)
cocoEval.params.imgIds = img_ids
if res_type == 'proposal':
cocoEval.params.useCats = 0
cocoEval.params.maxDets = list(max_dets)
cocoEval.evaluate()
cocoEval.accumulate()
cocoEval.summarize()
在经过我仔细阅读pycocotools源代码和实验测试,得到了如下的方法。
比如,需要对coco数据集中image_id为391895图像的目标检测结果进行计算mAP时,只需要改动上述代码第30行的cocoEval.params.imgIds
参数即可,改动后的完整代码如下
def coco_eval(result_files, result_types, coco, max_dets=(100, 300, 1000)):
for res_type in result_types:
assert res_type in [
'proposal', 'proposal_fast', 'bbox', 'segm', 'keypoints'
]
if mmcv.is_str(coco):
coco = COCO(coco)
assert isinstance(coco, COCO)
if result_types == ['proposal_fast']:
ar = fast_eval_recall(result_files, coco, np.array(max_dets))
for i, num in enumerate(max_dets):
print('AR@{}\t= {:.4f}'.format(num, ar[i]))
return
for res_type in result_types:
if isinstance(result_files, str):
result_file = result_files
elif isinstance(result_files, dict):
result_file = result_files[res_type]
else:
assert TypeError('result_files must be a str or dict')
assert result_file.endswith('.json')
coco_dets = coco.loadRes(result_file)
iou_type = 'bbox' if res_type == 'proposal' else res_type
cocoEval = COCOeval(coco, coco_dets, iou_type)
cocoEval.params.imgIds = [391895]
if res_type == 'proposal':
cocoEval.params.useCats = 0
cocoEval.params.maxDets = list(max_dets)
cocoEval.evaluate()
cocoEval.accumulate()
cocoEval.summarize()
同理,如果有N
张待计算图像,改动cocoEval.params.imgIds = [1, 2, 3, 4, ..., N]
即可。(列表中的数字是这些图像在coco数据集中的image_id)
此外,这是从groundtruth方面进行的改动,还有一个改动需要完成。那就是result_files
中,image_id
对应的问题。这个通过下面代码第221行的epoch_bias
变量来解决。
综上所述,完整的代码如下所示
import mmcv
import numpy as np
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
import argparse
import os
from mmdet.datasets import build_dataloader, build_dataset
from mmdet.models import build_detector
from mmcv.runner import get_dist_info, load_checkpoint
from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
import torch
def coco_eval(result_files, result_types, coco, image_id_list, max_dets=(100, 300, 1000)):
for res_type in result_types:
assert res_type in [
'proposal', 'proposal_fast', 'bbox', 'segm', 'keypoints'
]
if mmcv.is_str(coco):
coco = COCO(coco)
assert isinstance(coco, COCO)
if result_types == ['proposal_fast']:
ar = fast_eval_recall(result_files, coco, np.array(max_dets))
for i, num in enumerate(max_dets):
print('AR@{}\t= {:.4f}'.format(num, ar[i]))
return
for res_type in result_types:
if isinstance(result_files, str):
result_file = result_files
elif isinstance(result_files, dict):
result_file = result_files[res_type]
else:
assert TypeError('result_files must be a str or dict')
assert result_file.endswith('.json')
coco_dets = coco.loadRes(result_file)
# img_ids = coco.getImgIds()
iou_type = 'bbox' if res_type == 'proposal' else res_type
cocoEval = COCOeval(coco, coco_dets, iou_type)
cocoEval.params.imgIds = image_id_list
# cocoEval.params.imgIds = [391895, 522418, 184613, 318219, 554625, 574769, 60623, 309022, 5802, 222564]
if res_type == 'proposal':
cocoEval.params.useCats = 0
cocoEval.params.maxDets = list(max_dets)
cocoEval.evaluate()
cocoEval.accumulate()
cocoEval.summarize()
def fast_eval_recall(results,
coco,
max_dets,
iou_thrs=np.arange(0.5, 0.96, 0.05)):
if mmcv.is_str(results):
assert results.endswith('.pkl')
results = mmcv.load(results)
elif not isinstance(results, list):
raise TypeError(
'results must be a list of numpy arrays or a filename, not {}'.
format(type(results)))
gt_bboxes = []
img_ids = coco.getImgIds()
for i in range(len(img_ids)):
ann_ids = coco.getAnnIds(imgIds=img_ids[i])
ann_info = coco.loadAnns(ann_ids)
if len(ann_info) == 0:
gt_bboxes.append(np.zeros((0, 4)))
continue
bboxes = []
for ann in ann_info:
if ann.get('ignore', False) or ann['iscrowd']:
continue
x1, y1, w, h = ann['bbox']
bboxes.append([x1, y1, x1 + w - 1, y1 + h - 1])
bboxes = np.array(bboxes, dtype=np.float32)
if bboxes.shape[0] == 0:
bboxes = np.zeros((0, 4))
gt_bboxes.append(bboxes)
recalls = eval_recalls(
gt_bboxes, results, max_dets, iou_thrs, print_summary=False)
ar = recalls.mean(axis=1)
return ar
def xyxy2xywh(bbox):
_bbox = bbox.tolist()
return [
_bbox[0],
_bbox[1],
_bbox[2] - _bbox[0] + 1,
_bbox[3] - _bbox[1] + 1,
]
def proposal2json(dataset, results):
json_results = []
for idx in range(len(dataset)):
img_id = dataset.img_ids[idx]
bboxes = results[idx]
for i in range(bboxes.shape[0]):
data = dict()
data['image_id'] = img_id
data['bbox'] = xyxy2xywh(bboxes[i])
data['score'] = float(bboxes[i][4])
data['category_id'] = 1
json_results.append