【detectron】 voc_eval函数解析

最新推荐文章于 2022-06-07 17:57:26 发布

Mr_health

最新推荐文章于 2022-06-07 17:57:26 发布

阅读量1.8k

点赞数

分类专栏： detectron 文章标签： detectron

本文链接：https://blog.csdn.net/Mr_health/article/details/89519469

版权

detectron 专栏收录该内容

17 篇文章 0 订阅

订阅专栏

voc_eval函数的输入

def voc_eval(detpath,
             annopath,
             imagesetfile,
             classname,
             cachedir,
             ovthresh=0.5,
             use_07_metric=False):

根据参数的顺序，依次为：

(1) 检测结果的路径detpath

在跑test_net.py的时候，会在detectron中的以下路径生你测试数据检测结果的临时文件

/detectron/datasets/data/VOC2007/VOCdevkit2007/results/VOC2007/Main/comp4_307e5553-2bc0-4b06-a7a1-c42cc037b5b5_det_val_ship.txt

该临时文件的内容如下：第一列是测试图片的名称（无后缀的）第二列是分数，后四列是box

(2) 测试集gt的xml文件路径：annopath

也就是我们准备好的val_annotations

(3) 测试集的序列路径：imagesetfile

这个imagesetfile就是在训练前就准备好的val.txt，要读取这个文件

(4) classname：类别的名称，只是某一个类别

从调用voc_eval函数的外部函数可以看出，是依次对每一类调用voc_eval函数进行评估

for _, cls in enumerate(json_dataset.classes):
        if cls == '__background__':
            continue
        filename = _get_voc_results_file_template(
            json_dataset, salt).format(cls)
        rec, prec, ap = voc_eval(
            filename, anno_path, image_set_path, cls, cachedir, ovthresh=0.5,
            use_07_metric=use_07_metric)

voc_eval代码解析

    # first load gt
    if not os.path.isdir(cachedir):
        os.mkdir(cachedir)
    imageset = os.path.splitext(os.path.basename(imagesetfile))[0]
    cachefile = os.path.join(cachedir, imageset + '_annots.pkl')  #测试的缓存文件，也就是val_annotations.pkl
    # read list of images
    with open(imagesetfile, 'r') as f:  #imagesetfile就是val.txt，里面的每一行是测试图片的名称
        lines = f.readlines()
    imagenames = [x.strip() for x in lines]

    #如果上述的测试的缓存文件不存在，那么就通过annopath读取，并生成相应的缓存文件，以便下次直接可以读取
    if not os.path.isfile(cachefile):
        # load annots
        recs = {}
        for i, imagename in enumerate(imagenames):
            recs[imagename] = parse_rec(annopath.format(imagename))
            if i % 100 == 0:
                logger.info(
                    'Reading annotation for {:d}/{:d}'.format(
                        i + 1, len(imagenames)))
        # save
        logger.info('Saving cached annotations to {:s}'.format(cachefile))
        with open(cachefile, 'w') as f:
            cPickle.dump(recs, f)
    else:  #如果缓存文件存在那么直接读取
        # load
        with open(cachefile, 'r') as f:
            recs = cPickle.load(f)   #recs是一个字典，储存验证数据的gt，其中key是图像的名称，所对应的内容就是gt

    # extract gt objects for this class
    class_recs = {}
    npos = 0
    for imagename in imagenames:
        R = [obj for obj in recs[imagename] if obj['name'] == classname] #针对每一张测试样本，取出目标类型为classname的gt，构成R
        bbox = np.array([x['bbox'] for x in R])
        difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
        det = [False] * len(R)
        npos = npos + sum(~difficult)
        class_recs[imagename] = {'bbox': bbox,
                                 'difficult': difficult,
                                 'det': det}

    # read dets
    detfile = detpath.format(classname)  #读取检测结果
    with open(detfile, 'r') as f:
        lines = f.readlines()  #首先读取每一行

    splitlines = [x.strip().split(' ') for x in lines]  #上述读取完每一行后，在对每一行分割
    image_ids = [x[0] for x in splitlines]       #分割后第一个元素就是图片的名称image_ids，len（image_ids）是目标的个数
    confidence = np.array([float(x[1]) for x in splitlines])  #第二个元素是分数，，len（confidence）是目标的个数
    BB = np.array([[float(z) for z in x[2:]] for x in splitlines]) #后面四个元素为box

    # sort by confidence
    sorted_ind = np.argsort(-confidence) #对分数排序
    BB = BB[sorted_ind, :]      #相应按照分数的排序对box排序
    image_ids = [image_ids[x] for x in sorted_ind]  #应按照分数的排序对图像名称排序

    # go down dets and mark TPs and FPs
    nd = len(image_ids)  #目标的个数
    tp = np.zeros(nd)
    fp = np.zeros(nd)
    for d in range(nd):  #也就是按照分数从高到底，依次对于每一个目标
        R = class_recs[image_ids[d]]  #image_ids[d]是该目标对应的图片名称，R就是该图片对应的gt
        bb = BB[d, :].astype(float)  #相应地从BB中取出第d个box
        ovmax = -np.inf
        BBGT = R['bbox'].astype(float)
        #计算bb与R中各个gt的重叠
        if BBGT.size > 0:
            # compute overlaps
            # intersection
            ixmin = np.maximum(BBGT[:, 0], bb[0])
            iymin = np.maximum(BBGT[:, 1], bb[1])
            ixmax = np.minimum(BBGT[:, 2], bb[2])
            iymax = np.minimum(BBGT[:, 3], bb[3])
            iw = np.maximum(ixmax - ixmin + 1., 0.)
            ih = np.maximum(iymax - iymin + 1., 0.)
            inters = iw * ih

            # union
            uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
                   (BBGT[:, 2] - BBGT[:, 0] + 1.) *
                   (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)

            overlaps = inters / uni
            ovmax = np.max(overlaps)
            jmax = np.argmax(overlaps)

        if ovmax > ovthresh:
            if not R['difficult'][jmax]:
                if not R['det'][jmax]:
                    tp[d] = 1.
                    R['det'][jmax] = 1  #如果重叠大于ovthresh，那么tp设置为1（len(tp)=目标的数量）
                else:
                    fp[d] = 1.  
        else:
            fp[d] = 1.  #如果重叠小于ovthres，那么是假阳，在fp相应的位置标为1
    num = len(np.where(confidence>0.7)[0])   #confidence大于0.7的目标个数，由于fp，tp都是按照分数从高到底排序的，前num个目标就是我们需要统计的
    # compute precision recall
    fp = np.cumsum(fp)
    fpnum = int(fp[num-1]) #虚警个数
    tp = np.cumsum(tp)
    tpnum = int(tp[num-1]) #召回个数 
    rec = tp / float(npos)
    objectnum = int(npos)
    # avoid divide by zero in case the first detection matches a difficult
    # ground truth
    prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
    alarm = 1-prec[num-1]
    ap = voc_ap(rec, prec, use_07_metric)

    return rec, prec, ap, objectnum, tpnum, fpnum, alarm

顺便修改了一下

for _, cls in enumerate(json_dataset.classes):
        if cls == '__background__':
            continue
        filename = _get_voc_results_file_template(
            json_dataset, salt).format(cls)
        rec, prec, ap ,objectnum, tpnum, fpnum, alarm = voc_eval(
            filename, anno_path, image_set_path, cls, cachedir, ovthresh=0.5,
            use_07_metric=use_07_metric)
        aps += [ap]
        logger.info('AP for {} = {:.4f}'.format(cls, ap))
        logger.info('{}共含有目标{:d},召回个数为{:d}，召回率为{:.4f}，虚警个数为{:d}，虚警率为{:.4f}'.format(cls, objectnum, tpnum, tpnum/float(objectnum), fpnum, alarm))