NMS处理候选框（普通的NMS、numpy 、torch官方和 GPU）

阿爽的梦想

已于 2024-05-19 19:40:46 修改

阅读量840

点赞数 2

文章标签： python 深度学习人工智能 numpy

于 2023-04-22 10:07:05 首次发布

本文链接：https://blog.csdn.net/qq_44635614/article/details/129927876

版权

第一种：普通NMS，需要安装numpy库

第二种：numpy处理NMS，安装numpy库

第三种：torch处理NMS，安装torch库

第四种：GPU加速处理的NMS，安装cupy库

第一种：普通NMS，需要安装numpy库

import numpy as np
from tqdm import tqdm

# 普通处理
def nms_1(boxes, threshold=0.5):
    result = []
    while len(boxes) > 0:
        index = [i[4] for i in boxes]  # 把所有的分数取出来
        max_ = np.argmax(index)  # 取出index中元素最大值所对应的索引，此时最大值0.92，其对应的位置索引值为2，（索引值默认从0开始）
        max_cor = boxes[max_]
        result.append(boxes[max_])
        boxes = np.delete(boxes, max_, axis=0)  # 把原来boxes中的max_索引对应的数据删除
        res = []
        for i in tqdm(range(len(boxes))):
            if iou(max_cor[:-1], boxes[i][:-1]) < threshold:
                res.append(boxes[i])
        boxes = res
    return result


# 软处理
def nms_2(boxes, threshold, λ):  # soft_nms_2
    score = boxes[:][4]
    boxes = boxes[:][:4]
    result = []
    while len(boxes) > 0:
        boxes = [i for i in boxes if i[-1] > score]
        if len(boxes) == 0:
            break
        index = [i[-1] for i in boxes]
        max_ = np.argmax(index)
        max_cor = boxes[max_]
        boxes = np.delete(boxes, max_, axis=0)
        result.append(max_cor)
        for i in tqdm(range(len(boxes))):
            if iou(max_cor[:-1], boxes[i][:-1]) > threshold:
                boxes[i][-1] = boxes[i][-1] * np.exp(-1 * iou(max_cor[:-1], boxes[i][:-1]) / λ)
    result = [[i[0], i[1], i[2], i[3], i[4]] for i in result]
    return result


# 快速nms
def nms_3(boxes, threshold=0.5):  # fast_nms_3
    score = [i[-1] for i in boxes]
    matrix = np.zeros((len(boxes), len(boxes)))
    for i in range(len(boxes) - 1):
        for j in range(i + 1, len(boxes)):
            matrix[i, j] = iou(boxes[i][:-1], boxes[j][:-1])
    res1 = [max(matrix[:, i]) for i in range(len(matrix))]
    result = []
    for i in range(len(res1)):
        if res1[i] < threshold:
            result.append(boxes[i])
    return result

两种IOU的处理方式（跟上面的nms处理注意搭配iou的名字）：

def iou_1(predicted_bound, ground_truth_bound):  # 计算IOU
    pxmin, pymin, pxmax, pymax = predicted_bound
    # print("预测框P的坐标是：({}, {}, {}, {})".format(pxmin, pymin, pxmax, pymax))
    gxmin, gymin, gxmax, gymax = ground_truth_bound
    # print("原标记框G的坐标是：({}, {}, {}, {})".format(gxmin, gymin, gxmax, gymax))
    parea = (pxmax - pxmin) * (pymax - pymin)  # 计算P的面积
    garea = (gxmax - gxmin) * (gymax - gymin)  # 计算G的面积
    # print("预测框P的面积是：{}；原标记框G的面积是：{}".format(parea, garea))
    # 求相交矩形的左下和右上顶点坐标(xmin, ymin, xmax, ymax)
    xmin = max(pxmin, gxmin)  # 得到左
    ymin = max(pymin, gymin)  # 得到上
    xmax = min(pxmax, gxmax)  # 得到右
    ymax = min(pymax, gymax)  # 得到下
    # 计算相交矩形的面积
    area = max(0, xmax - xmin) * max(0, ymax - ymin)  # 可以用一行代码算出来相交矩形的面积
    # 并集的面积 = 两个矩形面积 - 交集面积
    IoU = area / (parea + garea - area)
    return IoU


def iou_2(predict_loc, ground_truth_loc):  # 这里用numpy的计算方式，因为效率会高一点
    # 计算圆形的交并比
    if len(predict_loc) == len(ground_truth_loc) == 3:  # 当输入的是，坐标和半径的时候
        x1, y1, r1 = predict_loc
        x2, y2, r2 = ground_truth_loc

        d = np.sqrt((np.square(x2 - x1) + np.square(y2 - y1)))  # 平方和再开方，求两个圆的中心距离d

        k1_cos = (np.square(r1) + np.square(d) - np.square(r2)) / (2 * np.multiply(r1, d))  # 由三角函数 计算 角o1余弦值
        k2_cos = (np.square(r2) + np.square(d) - np.square(1)) / (2 * np.multiply(r2, d))  # 由三角函数 计算 角o2余弦值

        angle_o1 = np.arccos(k1_cos)  # 计算出来具体角度，用于计算扇形面积 和 四边形的面积， 在这里计算出来的是弧度，如：π：180
        angle_o2 = np.arccos(k2_cos)

        k1_sin = np.sqrt((1 - np.square(k1_cos)))  # 求取角度的正弦值， 用来计算三角形的面积，进而计算四边形的面积
        # 四边形的面积
        s_qua = np.multiply(np.multiply(r1, d), k1_sin)  # 计算四边形的面积（两个全等三角形之和）公式，2 * 1/2 * a * b * sinc

        # 计算两个扇形的面积
        s_1 = angle_o1 * r1 * r1  # (2 * angle_o1 / (2 * np.pi)) * np.pi * r1 * r1, 这里是弧度制，化简后为此
        s_2 = angle_o2 * r2 * r2

        # 两个扇形的交集   在这里认为    r1 * r1 + r2 * r2 <= d * d
        s_i = s_1 + s_2 - s_qua
        # print('s_i:', s_i)

        s_iou = s_i / (np.pi * r1 * r1 + np.pi * r2 * r2 - s_i)  # s_iou = 交集 / 并集
        return s_iou

    # 计算矩形交并比
    elif len(predict_loc) == len(ground_truth_loc) == 4:  # 当输入的是，四个坐标时
        p_x_min, p_y_min, p_x_max, p_y_max = predict_loc
        g_x_min, g_y_min, g_x_max, g_y_max = ground_truth_loc

        # print("预测框P的坐标是：({}, {}, {}, {})".format(p_x_min, p_y_min, p_x_max, p_y_max))
        # print("原标记框G的坐标是：({}, {}, {}, {})".format(g_x_min, g_y_min, g_x_max, g_y_max))

        s_pre = (p_x_max - p_x_min) * (p_y_max - p_y_min)  # 计算预测框面积
        s_gro = (g_x_max - g_x_min) * (g_y_max - g_y_min)  # 计算真实框的面积
        # print("预测框P的面积是：{}；原标记框G的面积是：{}".format(s_pre, s_gro))

        # 这里认为 原点 在 左下角 求相交矩形的左,下,右,上顶点坐标(x_min, y_min, x_max, y_max)，
        # 若原点在左上角：相交矩形的左，上，右，下 对应 (x_min, y_min, x_max, y_max)
        x_min = max(p_x_min, g_x_min)  # 得到左
        y_min = max(p_y_min, g_y_min)  # 得到下
        x_max = min(p_x_max, g_x_max)  # 得到右
        y_max = min(p_y_max, g_y_max)  # 得到上

        s_i = max(0, x_max - x_min) * max(0, y_max - y_min)  # 计算交集的面积
        s_u = s_pre + s_gro - s_i  # 并集的面积 = 两个矩形面积 - 交集面积
        # 计算矩形的交并比，
        IoU = s_i / s_u
        return IoU

    else:
        return 0

第二种：numpy处理NMS，安装numpy库

import numpy as np

# numpy 处理
def nms_4(bboxes, threshold):
    bboxes = np.array(bboxes)
    # 计算所有候选框面积，为了iou做准备（因为numpy可以一次算多个，所以这里一次算完）
    x1 = bboxes[:, 0]
    y1 = bboxes[:, 1]
    x2 = bboxes[:, 2]
    y2 = bboxes[:, 3]
    score = bboxes[:, 4]
    areas = (x2 - x1 + 1) * (y2 - y1 + 1)  # areas.shape [6,]
    # 对置信度进行排序, argsort最终得到的是对应index, 默认从小到大排序，所以最后一个是最大（-1
    order = np.argsort(score)
    keep = []  # 返回值,返回对应的索引值
    while order.size > 0:
        # 将当前置信度最大的框加入返回值列表中 对应的1 2步
        index = order[-1]
        keep.append(index)
        # 对应第3步 计算其他框和当前选定的框的iou，因为这里数据类型是np，所以一次是计算的多个
        inner_x1 = np.maximum(x1[index], x1[order[:-1]])
        inner_x2 = np.minimum(x2[index], x2[order[:-1]])
        inner_y1 = np.maximum(y1[index], y1[order[:-1]])
        inner_y2 = np.minimum(y2[index], y2[order[:-1]])
        in_w = np.maximum(0.0, inner_x2 - inner_x1 + 1)
        in_h = np.maximum(0.0, inner_y2 - inner_y1 + 1)
        inner = in_w * in_h
        # 利用相交的面积和两个框自身的面积计算框的交并比, 将交并比大于阈值的框删除 对应第4步
        ratio = inner / (areas[index] + areas[order[:-1]] - inner)
        left = np.where(ratio < threshold)  # left里面对应的就是<thr的索引
        order = order[left]  # 将所有<thr的索引取出来
    result = [bboxes[i] for i in keep]
    return result

第三种：torch处理NMS，安装torch库

import torch
from torchvision.ops import nms

# torch1 官方处理
def nms_torch_5(bboxes, threshold):
    bboxes = torch.tensor(bboxes)   # 数组转换成张量
    # 计算所有候选框面积，为了iou做准备（因为numpy可以一次算多个，所以这里一次算完）
    a = bboxes[:, :-1]
    score = bboxes[:, 4]
    # print('a:', a)
    keep = nms(boxes=a, scores=score, iou_threshold=threshold)  # 返回的是索引值的集合

    bboxes_out = bboxes[keep]
    bboxes_out = bboxes_out.numpy().tolist()    # 该tensor转list不能一步完成，通常需要先把tensor转array,再把array转list
    return bboxes_out



# torch2
def nms_torch(bboxes, scores, threshold=0.5):
    x1 = bboxes[:, 0]
    y1 = bboxes[:, 1]
    x2 = bboxes[:, 2]
    y2 = bboxes[:, 3]
    areas = (x2 - x1) * (y2 - y1)  # [N,] 每个bbox的面积
    _, order = scores.sort(0, descending=True)  # 降序排列

    keep = []
    while order.numel() > 0:  # torch.numel()返回张量元素个数
        if order.numel() == 1:  # 保留框只剩一个
            i = order.item()
            keep.append(i)
            break
        else:
            i = order[0].item()  # 保留scores最大的那个框box[i]
            keep.append(i)

        # 计算box[i]与其余各框的IOU(思路很好)
        xx1 = x1[order[1:]].clamp(min=x1[i])  # [N-1,]
        yy1 = y1[order[1:]].clamp(min=y1[i])
        xx2 = x2[order[1:]].clamp(max=x2[i])
        yy2 = y2[order[1:]].clamp(max=y2[i])
        inter = (xx2 - xx1).clamp(min=0) * (yy2 - yy1).clamp(min=0)  # [N-1,]

        iou = inter / (areas[i] + areas[order[1:]] - inter)  # [N-1,]
        idx = (iou <= threshold).nonzero().squeeze()  # 注意此时idx为[N-1,] 而order为[N,]
        if idx.numel() == 0:
            break
        order = order[idx + 1]  # 修补索引之间的差值
    return torch.LongTensor(keep)  # Pytorch的索引值为LongTensor

第四种：GPU加速处理的NMS，安装cupy库

import cupy as cp

def gpu_nms_7(dets, thresh):
    """使用GPU进行非极大值抑制（NMS）。
    参数：
    dets -- 二维数组，每行表示一个边界框，前四列为边界框的左上角和右下角坐标，最后一列为边界框的置信度。
    thresh -- IOU阈值，大于该阈值的边界框将被去除。
    返回：
    保留下来的边界框和分数，与原输入保持一致。
    """
    dets = cp.array(dets)
    x1, y1, x2, y2, scores = [dets[:, i] for i in range(5)]

    areas = (x2 - x1 + 1) * (y2 - y1 + 1)
    order = scores.argsort()[::-1]

    keep = cp.zeros(order.shape, dtype=cp.bool_)

    while order.size > 0:
        i = order[0]
        keep[i] = True
        xx1 = cp.maximum(x1[i], x1[order[1:]])
        yy1 = cp.maximum(y1[i], y1[order[1:]])
        xx2 = cp.minimum(x2[i], x2[order[1:]])
        yy2 = cp.minimum(y2[i], y2[order[1:]])

        w = cp.maximum(0.0, xx2 - xx1 + 1)
        h = cp.maximum(0.0, yy2 - yy1 + 1)
        inter = w * h

        ovr = inter / (areas[i] + areas[order[1:]] - inter)
        inds = cp.where(ovr <= thresh)[0]
        order = order[inds + 1]
    index = cp.where(keep)[0]
    boxes_out1 = dets[index]
    boxes_out1 = boxes_out1.tolist()

    return boxes_out1

数据处理速度对比（不考虑普通的NMS，普通的NMS处理少量的数据还可，处理长的要花费好几个小时甚至是几天时间），

在25000个样本中，torch官方的NMS处理速度最快(0.5s），numpy次之(3.15s)，GPU处理速度最慢（12s）！

在420000个样本中，GPU处理速度最快（110s），torch官方的NMS处理速度次之(261s），numpy处理速度最慢（918s）！

然而，对于torch官方实现的NMS在处理多数据的时候，仍旧会出现交并比大于阈值的“重复框”！因此，torch官方的NMS处理较多数据，仍旧有很大的漏洞！

numpy 和GPU处理多数据，虽然也有误差，但误差对于torch官方的，基本上可以忽略不计！

综合：GPU处理NMS的性价比不错！