目标检测中面试时的基本代码

最新推荐文章于 2024-08-06 11:54:56 发布

大腿壮

最新推荐文章于 2024-08-06 11:54:56 发布

阅读量439

点赞数 2

分类专栏：深度学习

本文链接：https://blog.csdn.net/qq_41092190/article/details/106872020

版权

深度学习专栏收录该内容

9 篇文章 0 订阅

订阅专栏

计算IOU

def intersection(boxA, boxB):
    '''
    boxA: [A, 4]
    boxB: [B, 4]
    '''

    lt = np.maximum(boxA[:, np.newaxis, :2], boxB[np.newaxis, :, :2])
    rb = np.minimum(boxA[:, np.newaxis, 2:], boxB[np.newaxis, :, 2:])
    inter = np.clip(rb - lt, a_min=0, a_max=1e10)
    return inter[..., 0] * inter[..., 1]

def jaccard(boxA, boxB):
    '''
    :param boxA: [A, 4]
    :param boxB: [B, 4]
    :return:
    '''
    inter = intersection(boxA, boxB)  # [A, B]
    areaA = np.expand_dims((boxA[..., 2] - boxA[..., 0]) * (boxA[..., 3] - boxA[..., 1]), axis=1)
    areaB = np.expand_dims((boxB[..., 2] - boxB[..., 0]) * (boxB[..., 3] - boxB[..., 1]), axis=0)

    union = areaA + areaB - inter
    return union

非极大值抑制

def non_max_supression(prediction, conf_thres=0.5, nms_thres=0.4):
    '''
    :param prediction: [N, 4 + 1 + classes]  1指的是前景的概率
    '''
    image_pred = prediction[prediction[:, 4] > conf_thres]
    if image_pred.size(0) == 0:
        return

    # 只计算得分最大的一类
    score = image_pred[:, 4] * image_pred[:, 5:].max(1)[0]
    # 按照得分进行降序排列
    image_pred = image_pred[(-score).argsort()]
    class_confs, class_preds = image_pred[:, 5:].max(1, keepdim=True)
    # [N, 4 + 1 + 1 + 1]-->坐标，前景概率，类别置信度，所属类别
    detections = torch.cat((image_pred[:, :5], class_confs.float(), class_preds.float()), 1)

    keep_boxes = list()

    while (detections.size(0)):
        # 计算与第一个box的交并比，并选择超过阈值的其它bbox
        overlap = jaccard(detections[0, :4].unsqueeze(0), detections[:, :4]) > nms_thres
        # 选择与第一个box同类别的其它bbox
        match = detections[0, -1] == detections[:, -1]
        # 同类别，且IOU超过阈值的bbox需要被剔除掉
        invalid = overlap & match

        # 将当前box选出来
        keep_boxes.extend([detections[0]])
        # 一次nms后剩下的bbox
        detections = detections[~invalid]

    output = torch.stack(keep_boxes)
    return output

AP计算

AP的计算过程：按照给定的预测值降序排序，然后以每个预测值作为阈值，计算一组prec和rec。

对于07版本的AP计算过程：
采用11个点进行计算，分别是0,0.1,…1。对于每个划分点，找到大于等于该点的rec对应的prec中的最大值，然后将这11个prec求均值即可。

对于后续版本的AP计算过程：
不再以11个点进行计算，而是对于每个rec点都进行计算，找到大于等于该点的rec对应的prec的最大值（很显然从右往左计算复杂度最低）。没两个rec可以看作是矩形的宽，对应的prec是矩形的高，将所有的小矩形相加即可。

def calculate_ap(rec, prec, use_07_metric=False):
    if use_07_metric:
        ap = 0.
        for t in np.arange(0, 1.1, 0.1):
            if np.sum(rec >= t) == 0:
                p = 0
            else:
                p = np.max(prec[rec >= t])
            ap += p / 11
    else:
        mrec = np.concatenate(([0.], rec, [1.]))
        mpre = np.concatenate(([0.], prec, [0.]))

        # 依次计算出每个recall点对应的最大prec值（该点及右侧的所有prec的最大值）
        for i in range(mpre.size - 1, 0, -1):
            mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])

        # 我们需要计算曲线下的面积，分成若干个矩形计算，每个矩形的高是prec，宽度是rec
        # 因此找到rec相同的部分，即矩形的宽度（存在rec相同，但是prec不同的点，但是构不成矩形）
        i = np.where(mrec[1:] != mrec[:-1])[0]
        ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
        
        # 也可以通过下面这种方式，反正rec相同的时候面积为0，没有必要非得剔除掉再计算
        # for i in range(prec.size - 1):
        #     ap += (mrec[i] - mrec[i + 1]) * mpre[i + 1]
    return ap

precision和recall的计算过程

def get_batch_statistics(outputs, targets, iou_threshold):
    """
    统计当前batch
    params outputs: 非极大值抑制之后的预测值bbox
    params targets: 真实GT,[N, 1 + 1 + 4]， 1: 属于哪张图片 1：label， 4：cords（真实坐标，左上角和右下角）
    """
    batch_metrics = []

    # 遍历每一张图片
    for sample_i in range(len(outputs)):
        # 表示当前图片中没有预测框
        if outputs[sample_i] is None:
            continue
        # 当前图片中的预测值pred bbox
        output = outputs[sample_i]
        pred_boxes = output[:, :4]  # 预测坐标
        pred_scores = output[:, 4]  # 预测前景置信度
        pred_labels = output[:, -1]  # 预测类别

        # 用来记录每个pred box是否是真正例（单张图片）
        true_positives = np.zeros(pred_boxes.shape[0])

        # 从batch中选择属于当前图片的GT
        annotations = targets[targets[:, 0] == sample_i][:, 1:]
        target_labels = annotations[:, 0] if len(annotations) else []  # 当前图片中所有GT的所属类别

        if len(annotations):
            detected_boxes = []
            target_boxes = annotations[:, 1:]

            # 遍历每一个预测bbox
            for pred_i, (pred_box, pred_label) in enumerate(zip(pred_boxes, pred_labels)):

                # 每个GT只会匹配一个pred，当所有GT匹配后，剩下的pred全是假正例了
                if len(detected_boxes) == len(annotations):
                    break

                # 当前预测类别不属于任何一个GT
                if pred_label not in target_labels:
                    continue
				# 计算IOU
                iou, box_index = bbox_iou(pred_box.unsqueeze(0), target_boxes).max(0)
                
				'''
                可能存在pred box与GT1的IOU更大（这样，GT2还可能会被另一个pred匹配），但是预测类别是GT2的类别（与GT1类别不同）
                这导致后面计算类别2时，有两个pred box的预测类别是cls2，但是cls2的GT只有1个，导致recall大于1
                综上所述，我觉的上面计算IOU的代码可以修改成下面的形式：
                iou, box_index = bbox_iou(pred_box.unsqueeze(0), target_boxes[target_labels == pred_label]).max(0)
                也就是说，选择与当前pred box类别相同的GT计算IOU
                '''

                # 计算当前pred box和GT的IOU，最大IOU超过阈值，且匹配的GT还没有被使用过
                if iou >= iou_threshold and box_index not in detected_boxes:
                    true_positives[pred_i] = 1
                    detected_boxes += [box_index]
        batch_metrics.append([true_positives, pred_scores, pred_labels])
    return batch_metrics

def ap_per_class(tp, conf, pred_cls, target_cls):
    """ 
        tp:    True positives (list). 整个数据集经过非极大值抑制后剩下的pred box，如果是真正例=1，反之=0
        conf:  Objectness value from 0-1 (list).
        pred_cls: Predicted object classes (list).
        target_cls: True object classes (list).
    # Returns
        The average precision as computed in py-faster-rcnn.
    """

    # 将所有pred bbox根据置信度排序
    i = np.argsort(-conf)
    tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]

    # 数据集中的GT的类别
    unique_classes = np.unique(target_cls)

    # 记录计算得到的AP等结果（每个类别单独计算）
    ap, p, r = [], [], []

    # 对每一个类别分别计算ap曲线
    for c in tqdm.tqdm(unique_classes, desc="Computing AP"):
        i = pred_cls == c
        n_gt = (target_cls == c).sum()  # 属于当前类别的GT数量
        n_p = i.sum()  # 属于当前类别的pred box数量

        if n_p == 0 and n_gt == 0:  # 如果当前类别下GT为0，同时pred box也为0，则不执行（不太可能，类别是通过GT计算得到的）
            continue
        elif n_p == 0 or n_gt == 0:  # 当前类别下，没有预测到一个bbox，所以没有真正例，所以pre额rec都是0-->ap=0
            ap.append(0)
            r.append(0)
            p.append(0)
        else:
            # Accumulate FPs and TPs
            # pred box是按照置信度降序排列的，这里将预测为当前类别的pred box取出
            # 这个累加的意思是：以每个置信度为阈值，计算TP和FP，改变阈值，能够得到一系列的TP和FP
            fpc = (1 - tp[i]).cumsum()
            tpc = (tp[i]).cumsum()

            # Recall， 不同阈值下的recall
            recall_curve = tpc / (n_gt + 1e-16)
            r.append(recall_curve[-1])

            # Precision， 不同阈值下的precision
            precision_curve = tpc / (tpc + fpc)
            p.append(precision_curve[-1])

            # 类似上面"AP计算"中的代码
            ap.append(compute_ap(recall_curve, precision_curve))

    # Compute F1 score (harmonic mean of precision and recall)
    p, r, ap = np.array(p), np.array(r), np.array(ap)
    
    return p, r, ap, unique_classes.astype("int32")

AUC面积计算

AUC曲线图，横坐标是假正例率：预测为真实际为假/实际为假；纵坐标是真正例率：预测为真实际为真/实际为真；
AUC的另一种解释：对于任意一对正负例样本，模型将正样本预测为正例的可能性大于将负例预测为正例的可能性的概率。


def AUC(label, pre):
　　#计算正样本和负样本的索引，以便索引出之后的概率值
    pos = [i for i in range(len(label)) if label[i] == 1]
    neg = [i for i in range(len(label)) if label[i] == 0]
 
    auc = 0
    for i in pos:
        for j in neg:
            if pre[i] > pre[j]:
                auc += 1
            elif pre[i] == pre[j]:
                auc += 0.5