目标检测中准确率评价指标

caffe中实现SSD准确率评价方法是TestDetection()函数。mAp指标值是每个类别的Average precision的平均值。

本文分析DetectionEvaluateLayer中实现评价的过程及其评价指标。需要指出的是,一般的前向过程是不包含DetectionEvaluateLayer层的定义的,只有在训练的评价(TEST)过程中才会使用到该layer。具体使用该层时,需要在prototxt文件中把下述定义写到DetectionOutput的定义之后。

layer {
  name: "detection_eval"
  type: "DetectionEvaluate"
  bottom: "detection_out"
  bottom: "label"
  top: "detection_eval"
  include {
    phase: TEST
  }
  detection_evaluate_param {
    num_classes: 11
    background_label_id: 0
    overlap_threshold: 0.5
    evaluate_difficult_gt: false
  }
}

在计算Average Precision之前需要先计算出所有预测框与gt_bboxes的匹配。

SSD evaluation  layer以detection_output layer的输出([image_id, label, confidence, xmin, ymin, xmax, ymax])作为输入,同时输出 [image_id, label, confidence, true_pos, false_pos]元组。

具体操作为:1)遍历每个类别;2)将该类别的预测框按confidence降序排列;3)对于每个pred_bbox,找出与其有最大iou的gt_bbox;4)如果该gt_bbox之前没有被分配且iou大于给定的阈值(比如0.5),那将该gt_bbox分配该给pred_bbox,设置该pred_bbox为true positive;否则设置该pred_bbox为false positive.

//from code:caffe-ssd/src/caffe/layers/detection_evaluate_layer.cpp
//function: DetectionEvaluateLayer<Dtype>::Forward_cpu

      for (LabelBBox::iterator iit = detections.begin();
           iit != detections.end(); ++iit) {
        int label = iit->first;
        if (label == -1) {
          continue;
        }
        vector<NormalizedBBox>& bboxes = iit->second;
        if (label_bboxes.find(label) == label_bboxes.end()) {
          // No ground truth for current label. All detections become false_pos.
          for (int i = 0; i < bboxes.size(); ++i) {
            top_data[num_det * 5] = image_id;
            top_data[num_det * 5 + 1] = label;
            top_data[num_det * 5 + 2] = bboxes[i].score();
            top_data[num_det * 5 + 3] = 0;
            top_data[num_det * 5 + 4] = 1;
            ++num_det;
          }
        } else {
          vector<NormalizedBBox>& gt_bboxes = label_bboxes.find(label)->second;
          // Scale ground truth if needed.
          if (!use_normalized_bbox_) {
            CHECK_LT(count_, sizes_.size());
            for (int i = 0; i < gt_bboxes.size(); ++i) {
              OutputBBox(gt_bboxes[i], sizes_[count_], has_resize_,
                         resize_param_, &(gt_bboxes[i]));
            }
          }
          vector<bool> visited(gt_bboxes.size(), false);
          // Sort detections in descend order based on scores.
          std::sort(bboxes.begin(), bboxes.end(), SortBBoxDescend);
          for (int i = 0; i < bboxes.size(); ++i) {
            top_data[num_det * 5] = image_id;
            top_data[num_det * 5 + 1] = label;
            top_data[num_det * 5 + 2] = bboxes[i].score();
            if (!use_normalized_bbox_) {
              OutputBBox(bboxes[i], sizes_[count_], has_resize_,
                         resize_param_, &(bboxes[i]));
            }
            // Compare with each ground truth bbox.
            float overlap_max = -1;
            int jmax = -1;
            //找出与当前bboxes[i]的交集最大的gt_bboxes[jmax]
            for (int j = 0; j < gt_bboxes.size(); ++j) {
              float overlap = JaccardOverlap(bboxes[i], gt_bboxes[j],
                                             use_normalized_bbox_);
              if (overlap > overlap_max) {
                overlap_max = overlap;
                jmax = j;
              }
            }
            //只有阈值不小于overlap_threshold_的预测框才可能是正样本
            if (overlap_max >= overlap_threshold_) {
              if (evaluate_difficult_gt_ ||
                  (!evaluate_difficult_gt_ && !gt_bboxes[jmax].difficult())) {
                if (!visited[jmax]) {
                  // true positive.
                  top_data[num_det * 5 + 3] = 1;
                  top_data[num_det * 5 + 4] = 0;
                  visited[jmax] = true;
                } else {//它匹配的gt_bbox被前面的pred_bbox匹配了
                  // false positive (multiple detection).
                  top_data[num_det * 5 + 3] = 0;
                  top_data[num_det * 5 + 4] = 1;
                }
              }
            } else {
              // false positive.
              top_data[num_det * 5 + 3] = 0;//正样本标志为0
              top_data[num_det * 5 + 4] = 1;//负样本标志为1
            }
            ++num_det;
          }
        }
      }
    }

计算mAP的代码位于src/caffe/solver.cpp中的TestDetection的函数,该函数以vector<pair<float, int> > label_true_pos,vector<pair<float, int> > label_false_pos作为参数调用src/caffe/util/bbox_util.cpp文件中的ComputeAP函数。ComputeAP计算出所有的precision和recall值。average precision的计算方式有11point(VOC2007 styl),MaxIntegral(VOC2012 or ILSVRC style),Integral(会比11ponit计算出来的值略大)。

关于Average_precision的公式,可以参照wiki上的解释:Wikipedia entry for the Average precision

下面介绍下这三种AP的计算方式。首先需要画出P-R曲线(纵轴是P,横轴是R),它应该是条左上到右下的凸线;

如果一共有n个P-R值,Integral的计算方式是将横轴划分为n+1份,然后每份的宽度为recall[i+1]-recall[i],然后ap +=(recall[i+1]-recall[i])*precision[i],i逐渐增加到n。这个其实就是计算P-R曲线与横纵轴围城的面积,即积分;

MaxIntegral与Integral较相似,区别是MaxIntegral的计算方式是ap +=(recall[i+1]-recall[i])*precision[i+1],i逐渐减少到0;(是否MaxIntegral计算出来的值比Integral大?这个有待进一步验证)

11point的计算方式可以参照文章:深度学习-目标检测评估指标P-R曲线、AP、mAP。SSD的实现代码如下:

    //必须先对pair<rec,prec>按rec进行降序排序,否则结果不对
    // VOC2007 style for computing AP.
    vector<float> max_precs(11, 0.);
    int start_idx = num - 1;
    for (int j = 10; j >= 0; --j) {
      for (int i = start_idx; i >= 0 ; --i) {
        if ((*rec)[i] < j / 10.) {
          start_idx = i;
          if (j > 0) {
            max_precs[j-1] = max_precs[j];
          }
          break;
        } else {
          if (max_precs[j] < (*prec)[i]) {
            max_precs[j] = (*prec)[i];
          }
        }
      }
    }
    for (int j = 10; j >= 0; --j) {
      *ap += max_precs[j] / 11;
    }

上述代码感觉还有更优的实现。

其中prec,rec的计算方式如下:

const vector<pair<float, int> > tp;//<score,index>
const vector<pair<float, int> > fp;
// Compute cumsum of tp.
vector<int> tp_cumsum;
CumSum(tp, &tp_cumsum);//这里对tp和fp按照score降序排列
CHECK_EQ(tp_cumsum.size(), num);

// Compute cumsum of fp.
vector<int> fp_cumsum;
CumSum(fp, &fp_cumsum);
CHECK_EQ(fp_cumsum.size(), num);
// Compute precision.
for (int i = 0; i < num; ++i) {
    //注意:prec的排序不一定是降序
	prec->push_back(static_cast<float>(tp_cumsum[i]) /
				(tp_cumsum[i] + fp_cumsum[i]));
}
// Compute recall.
for (int i = 0; i < num; ++i) {
	CHECK_LE(tp_cumsum[i], num_pos);
	rec->push_back(static_cast<float>(tp_cumsum[i]) / num_pos);//rec和tp_cumsum一样,降序排列的
}

 

展开阅读全文

没有更多推荐了,返回首页