算法评价指标recall、precision、F1 score介绍及PR曲线代码分析

最新推荐文章于 2025-03-24 10:31:54 发布

cc__cc__

最新推荐文章于 2025-03-24 10:31:54 发布

阅读量4.6k

点赞数 4

分类专栏：深度学习文章标签：算法机器学习深度学习

本文链接：https://blog.csdn.net/cc__cc__/article/details/105450167

版权

深度学习专栏收录该内容

24 篇文章

订阅专栏

一、算法评价指标介绍

真实值（↓）\\ 预测值（→）	Positive	Negative
Positive	True Positive（TP）	False Negative（FN）
Negative	False Positive（FP）	True Negative（TN）

TP：表示将正类预测为正类的数目
FN：表示将正类预测为负类的数目
……其余同理，【一致判真假(T/F)，预测判阴阳(N/P)】

1. recall 计算公式

在这里插入图片描述

可以看出，recall表示真实的正类有多少被正确预测出来

2. precision 计算公式

在这里插入图片描述

可以看出，precision表示预测的正类中有多少是真实的

3. F1 score 计算公式

在这里插入图片描述

如需综合考虑recall和precision则可以使用F1 score

注意：recall、precision、F1 score都是针对二分类而言的

二、PR曲线介绍

在一些文献中经常会看到下图所示的PR曲线
关于PR曲线的理论解释有很多，这里不再赘述，下面主要结合代码分析一下PR曲线的横纵坐标究竟是如何得到的以及其形式是怎样的。
以https://github.com/Stephenfang51/mmdetection_visualize中的代码为例

# mean_ap_visualize.py
def tpfp_default(det_bboxes, gt_bboxes, gt_ignore, iou_thr, area_ranges=None):
    """Check if detected bboxes are true positive or false positive.

    Args:
        det_bbox (ndarray): the detected bbox
        gt_bboxes (ndarray): ground truth bboxes of this image
        gt_ignore (ndarray): indicate if gts are ignored for evaluation or not
        iou_thr (float): the iou thresholds

    Returns:
        tuple: (tp, fp), two arrays whose elements are 0 and 1
    """
    num_dets = det_bboxes.shape[0]   # 这里都是相对于单张图而言的，在下方map_roc_pr函数中有循环调用
    num_gts = gt_bboxes.shape[0]
    if area_ranges is None:
        area_ranges = [(None, None)]
    num_scales = len(area_ranges)
    # tp and fp are of shape (num_scales, num_gts), each row is tp or fp of
    # a certain scale
    tp = np.zeros((num_scales, num_dets), dtype=np.float32)  # 这里假定num_scales=1 则tp,fp的维度均为(1,num_dets)，进而对当前图片的每个预测结果进行判断
    fp = np.zeros((num_scales, num_dets), dtype=np.float32)
    # if there is no gt bboxes in this image, then all det bboxes
    # within area range are false positives
    if gt_bboxes.shape[0] == 0:
        if area_ranges == [(None, None)]:
            fp[...] = 1
        else:
            det_areas = (det_bboxes[:, 2] - det_bboxes[:, 0] + 1) * (
                det_bboxes[:, 3] - det_bboxes[:, 1] + 1)
            for i, (min_area, max_area) in enumerate(area_ranges):
                fp[i, (det_areas >= min_area) & (det_areas < max_area)] = 1
        return tp, fp
    ious = bbox_overlaps(det_bboxes, gt_bboxes)
    ious_max = ious.max(axis=1)
    ious_argmax = ious.argmax(axis=1)
    sort_inds = np.argsort(-det_bboxes[:, -1])  
    for k, (min_area, max_area) in enumerate(area_ranges):
        gt_covered = np.zeros(num_gts, dtype=bool)
        # if no area range is specified, gt_area_ignore is all False
        if min_area is None:
            gt_area_ignore = np.zeros_like(gt_ignore, dtype=bool)
        else:
            gt_areas = (gt_bboxes[:, 2] - gt_bboxes[:, 0] + 1) * (
                gt_bboxes[:, 3] - gt_bboxes[:, 1] + 1)
            gt_area_ignore = (gt_areas < min_area) | (gt_areas >= max_area)
        for i in sort_inds:
            if ious_max[i] >= iou_thr:   # 根据设定的IoU阈值对每一个预测结果对应的tp或fp赋1
                matched_gt = ious_argmax[i]
                if not (gt_ignore[matched_gt] or gt_area_ignore[matched_gt]):
                    if not gt_covered[matched_gt]:
                        gt_covered[matched_gt] = True
                        tp[k, i] = 1
                    else:
                        fp[k, i] = 1
                # otherwise ignore this detected bbox, tp = 0, fp = 0
            elif min_area is None:
                fp[k, i] = 1
            else:
                bbox = det_bboxes[i, :4]
                area = (bbox[2] - bbox[0] + 1) * (bbox[3] - bbox[1] + 1)
                if area >= min_area and area < max_area:
                    fp[k, i] = 1
    return tp, fp  # tuple: (tp, fp), two arrays whose elements are 0 and 1

下面再来看一下得到最终tp和fp值的函数

def map_roc_pr(det_results,
             gt_bboxes,
             gt_labels,
             gt_ignore=None,
             scale_ranges=None,
             iou_thr=0.5,
             dataset=None,
             print_summary=True):
    """Evaluate mAP of a dataset.

    Args:
        det_results (list): a list of list, [[cls1_det, cls2_det, ...], ...]
        gt_bboxes (list): ground truth bboxes of each image, a list of K*4
            array.
        gt_labels (list): ground truth labels of each image, a list of K array
        gt_ignore (list): gt ignore indicators of each image, a list of K array
        scale_ranges (list, optional): [(min1, max1), (min2, max2), ...]
        iou_thr (float): IoU threshold
        dataset (None or str or list): dataset name or dataset classes, there
            are minor differences in metrics for different datsets, e.g.
            "voc07", "imagenet_det", etc.
        print_summary (bool): whether to print the mAP summary

    Returns:
        tuple: (mAP, [dict, dict, ...])
    """
    assert len(det_results) == len(gt_bboxes) == len(gt_labels)   # 与测试集的数目相同
    if gt_ignore is not None:
        assert len(gt_ignore) == len(gt_labels)
        for i in range(len(gt_ignore)):
            assert len(gt_labels[i]) == len(gt_ignore[i])
    area_ranges = ([(rg[0]**2, rg[1]**2) for rg in scale_ranges]
                   if scale_ranges is not None else None)
    num_scales = len(scale_ranges) if scale_ranges is not None else 1
    eval_results = []
    num_classes = len(det_results[0])  # positive class num
    gt_labels = [
        label if label.ndim == 1 else label[:, 0] for label in gt_labels
    ]
    
    f_measure_list = []
    recall_list = []
    precision_list = []
    ap_list = []
    for i in range(num_classes):   # recall 和 precison 都是针对二分类的，所以如果有多个类别则需分别计算
        # get gt and det bboxes of this class
        cls_dets, cls_gts, cls_gt_ignore = get_cls_results(
            det_results, gt_bboxes, gt_labels, gt_ignore, i)
        # calculate tp and fp for each image
        tpfp_func = (    # 这里假定调用上方分析的tpfp_default函数
            tpfp_imagenet if dataset in ['det', 'vid'] else tpfp_default)
        tpfp = [
            tpfp_func(cls_dets[j], cls_gts[j], cls_gt_ignore[j], iou_thr,
                      area_ranges) for j in range(len(cls_dets))    # 循环 对每张图都得到tpfp_default的返回结果
        ]
        tp, fp = tuple(zip(*tpfp))  
        # calculate gt number of each scale, gts ignored or beyond scale
        # are not counted
        num_gts = np.zeros(num_scales, dtype=int)
        for j, bbox in enumerate(cls_gts):
            if area_ranges is None:
                num_gts[0] += np.sum(np.logical_not(cls_gt_ignore[j]))
            else:
                gt_areas = (bbox[:, 2] - bbox[:, 0] + 1) * (
                    bbox[:, 3] - bbox[:, 1] + 1)
                for k, (min_area, max_area) in enumerate(area_ranges):
                    num_gts[k] += np.sum(
                        np.logical_not(cls_gt_ignore[j]) &
                        (gt_areas >= min_area) & (gt_areas < max_area))
        # sort all det bboxes by score, also sort tp and fp
        cls_dets = np.vstack(cls_dets) 
        num_dets = cls_dets.shape[0]
        sort_inds = np.argsort(-cls_dets[:, -1]) 
        tp = np.hstack(tp)[:, sort_inds]
        fp = np.hstack(fp)[:, sort_inds]
        # calculate recall and precision with tp and fp
        tp = np.cumsum(tp, axis=1)   # 累加操作！（随着置信度降低，进而得到对应的recall和precision用以绘制PR曲线）
        fp = np.cumsum(fp, axis=1)
        eps = np.finfo(np.float32).eps
        recalls = tp / np.maximum(num_gts[:, np.newaxis], eps)   
        precisions = tp / np.maximum((tp + fp), eps)  # 到这里就清楚recall和precision的值了。其具体形式如下：
        ……
        ……
        ……
    return mean_ap, eval_results

# recalls示例
array([7.47943156e-04, 1.49588631e-03, 2.24382947e-03, 2.99177263e-03,
       3.73971578e-03, 4.48765894e-03, 5.23560209e-03, 5.98354525e-03,
       6.73148841e-03, 7.47943156e-03, 8.22737472e-03, 8.97531788e-03,
       9.72326103e-03, 1.04712042e-02, 1.12191473e-02, 1.19670905e-02,
       ...,9.74569933e-01, 9.74569933e-01])
# precisions示例
array([1.        , 1.        , 1.        , 1.        , 1.        ,
       1.        , 1.        , 1.        , 1.        , 1.        ,
       1.        , 1.        , 1.        , 1.        , 1.        ,
       1.        , 1.        , 1.        , 0.9962121 , 0.99622643,
       0.9962406 , 0.9962547 , 0.99626863, 0.9962825 , 0.9962963 ,
       ...,0.06361323, 0.06361012, 0.06360701, 0.06360389])