voc_eval函数的输入
def voc_eval(detpath,
annopath,
imagesetfile,
classname,
cachedir,
ovthresh=0.5,
use_07_metric=False):
根据参数的顺序,依次为:
(1) 检测结果的路径detpath
在跑test_net.py的时候,会在detectron中的以下路径生你测试数据检测结果的临时文件
/detectron/datasets/data/VOC2007/VOCdevkit2007/results/VOC2007/Main/comp4_307e5553-2bc0-4b06-a7a1-c42cc037b5b5_det_val_ship.txt
该临时文件的内容如下:第一列是测试图片的名称(无后缀的) 第二列是分数,后四列是box
(2) 测试集gt的xml文件路径:annopath
也就是我们准备好的val_annotations
(3) 测试集的序列路径:imagesetfile
这个imagesetfile就是在训练前就准备好的val.txt,要读取这个文件
(4) classname:类别的名称,只是某一个类别
从调用voc_eval函数的外部函数可以看出,是依次对每一类调用voc_eval函数进行评估
for _, cls in enumerate(json_dataset.classes):
if cls == '__background__':
continue
filename = _get_voc_results_file_template(
json_dataset, salt).format(cls)
rec, prec, ap = voc_eval(
filename, anno_path, image_set_path, cls, cachedir, ovthresh=0.5,
use_07_metric=use_07_metric)
voc_eval代码解析
# first load gt
if not os.path.isdir(cachedir):
os.mkdir(cachedir)
imageset = os.path.splitext(os.path.basename(imagesetfile))[0]
cachefile = os.path.join(cachedir, imageset + '_annots.pkl') #测试的缓存文件,也就是val_annotations.pkl
# read list of images
with open(imagesetfile, 'r') as f: #imagesetfile就是val.txt,里面的每一行是测试图片的名称
lines = f.readlines()
imagenames = [x.strip() for x in lines]
#如果上述的测试的缓存文件不存在,那么就通过annopath读取,并生成相应的缓存文件,以便下次直接可以读取
if not os.path.isfile(cachefile):
# load annots
recs = {}
for i, imagename in enumerate(imagenames):
recs[imagename] = parse_rec(annopath.format(imagename))
if i % 100 == 0:
logger.info(
'Reading annotation for {:d}/{:d}'.format(
i + 1, len(imagenames)))
# save
logger.info('Saving cached annotations to {:s}'.format(cachefile))
with open(cachefile, 'w') as f:
cPickle.dump(recs, f)
else: #如果缓存文件存在那么直接读取
# load
with open(cachefile, 'r') as f:
recs = cPickle.load(f) #recs是一个字典,储存验证数据的gt,其中key是图像的名称,所对应的内容就是gt
# extract gt objects for this class
class_recs = {}
npos = 0
for imagename in imagenames:
R = [obj for obj in recs[imagename] if obj['name'] == classname] #针对每一张测试样本,取出目标类型为classname的gt,构成R
bbox = np.array([x['bbox'] for x in R])
difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
det = [False] * len(R)
npos = npos + sum(~difficult)
class_recs[imagename] = {'bbox': bbox,
'difficult': difficult,
'det': det}
# read dets
detfile = detpath.format(classname) #读取检测结果
with open(detfile, 'r') as f:
lines = f.readlines() #首先读取每一行
splitlines = [x.strip().split(' ') for x in lines] #上述读取完每一行后,在对每一行分割
image_ids = [x[0] for x in splitlines] #分割后第一个元素就是图片的名称image_ids,len(image_ids)是目标的个数
confidence = np.array([float(x[1]) for x in splitlines]) #第二个元素是分数,,len(confidence)是目标的个数
BB = np.array([[float(z) for z in x[2:]] for x in splitlines]) #后面四个元素为box
# sort by confidence
sorted_ind = np.argsort(-confidence) #对分数排序
BB = BB[sorted_ind, :] #相应按照分数的排序对box排序
image_ids = [image_ids[x] for x in sorted_ind] #应按照分数的排序对图像名称排序
# go down dets and mark TPs and FPs
nd = len(image_ids) #目标的个数
tp = np.zeros(nd)
fp = np.zeros(nd)
for d in range(nd): #也就是按照分数从高到底,依次对于每一个目标
R = class_recs[image_ids[d]] #image_ids[d]是该目标对应的图片名称,R就是该图片对应的gt
bb = BB[d, :].astype(float) #相应地从BB中取出第d个box
ovmax = -np.inf
BBGT = R['bbox'].astype(float)
#计算bb与R中各个gt的重叠
if BBGT.size > 0:
# compute overlaps
# intersection
ixmin = np.maximum(BBGT[:, 0], bb[0])
iymin = np.maximum(BBGT[:, 1], bb[1])
ixmax = np.minimum(BBGT[:, 2], bb[2])
iymax = np.minimum(BBGT[:, 3], bb[3])
iw = np.maximum(ixmax - ixmin + 1., 0.)
ih = np.maximum(iymax - iymin + 1., 0.)
inters = iw * ih
# union
uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
(BBGT[:, 2] - BBGT[:, 0] + 1.) *
(BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
overlaps = inters / uni
ovmax = np.max(overlaps)
jmax = np.argmax(overlaps)
if ovmax > ovthresh:
if not R['difficult'][jmax]:
if not R['det'][jmax]:
tp[d] = 1.
R['det'][jmax] = 1 #如果重叠大于ovthresh,那么tp设置为1(len(tp)=目标的数量)
else:
fp[d] = 1.
else:
fp[d] = 1. #如果重叠小于ovthres,那么是假阳,在fp相应的位置标为1
num = len(np.where(confidence>0.7)[0]) #confidence大于0.7的目标个数,由于fp,tp都是按照分数从高到底排序的,前num个目标就是我们需要统计的
# compute precision recall
fp = np.cumsum(fp)
fpnum = int(fp[num-1]) #虚警个数
tp = np.cumsum(tp)
tpnum = int(tp[num-1]) #召回个数
rec = tp / float(npos)
objectnum = int(npos)
# avoid divide by zero in case the first detection matches a difficult
# ground truth
prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
alarm = 1-prec[num-1]
ap = voc_ap(rec, prec, use_07_metric)
return rec, prec, ap, objectnum, tpnum, fpnum, alarm
顺便修改了一下
for _, cls in enumerate(json_dataset.classes):
if cls == '__background__':
continue
filename = _get_voc_results_file_template(
json_dataset, salt).format(cls)
rec, prec, ap ,objectnum, tpnum, fpnum, alarm = voc_eval(
filename, anno_path, image_set_path, cls, cachedir, ovthresh=0.5,
use_07_metric=use_07_metric)
aps += [ap]
logger.info('AP for {} = {:.4f}'.format(cls, ap))
logger.info('{}共含有目标{:d},召回个数为{:d},召回率为{:.4f},虚警个数为{:d},虚警率为{:.4f}'.format(cls, objectnum, tpnum, tpnum/float(objectnum), fpnum, alarm))