图像处理随笔——soft-nms_softnms原理-CSDN博客

本文链接：https://blog.csdn.net/neil3611244/article/details/81709914

上一篇文章我们介绍了NMS算法。结尾处我们提到传统NMS存在的两个问题：

1. 当两个目标框接近时，分数更低的框就会因为与之重叠面积过大而被删掉

2. NMS的阈值需要手动确定，设置小了会漏检，设置大会误检

针对上述两个问题，我们可以不直接删除所有IOU大于阈值的框，而是降低其置信度，即softnms算法原理。

NMS公式：

softnms改进思想是：M为当前得分最高框，bi为待处理框，bi和M的IOU越大，bi的得分si就下降的越厉害（而不是直接置零）。有两种衰减方式，一种是线性加权：

一种是高斯加权：

代码如下：

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wednesday August 14 22:28:00 2018

@author: CW
"""
import numpy as np
import cv2
import matplotlib.pyplot as plt


boxes = np.array([[100, 100, 210, 210, 0.71],
                  [250, 250, 420, 420, 0.8],
                  [220, 200, 320, 330, 0.92],
                  [100, 100, 210, 210, 0.72],
                  [230, 240, 325, 330, 0.81],
                  [220, 230, 315, 340, 0.91]])


def iou(xmin, ymin, xmax, ymax, areas, lastInd, beforeInd, threshold):
    # 将lastInd指向的box，与之前的所有存活的box指向坐标做比较
    xminTmp = np.maximum(xmin[lastInd], xmin[beforeInd])
    yminTmp = np.maximum(ymin[lastInd], ymin[beforeInd])
    xmaxTmp = np.minimum(xmax[lastInd], xmax[beforeInd])
    ymaxTmp = np.minimum(ymax[lastInd], ymax[beforeInd])

    # 计算lastInd指向的box，与其他box交集的，所有width，height
    width = np.maximum(0.0, xmaxTmp - xminTmp + 1)
    height = np.maximum(0.0, ymaxTmp - yminTmp + 1)

    # 计算存活box与last指向box的交集面积
    intersection = width * height
    union = areas[beforeInd] + areas[lastInd] - intersection
    iou_value = intersection / union

    indexOutput = [item[0] for item in zip(beforeInd, iou_value) if item[1] <= threshold]

    return indexOutput

def soft_nms(boxes, threshold=0.001, sigma=0.5, Nt=0.3, method=1):
    N = len(boxes)
    for i in range(N):
        maxscore = boxes[i, 4]
        maxpos = i

        tx1 = boxes[i,0]
        ty1 = boxes[i,1]
        tx2 = boxes[i,2]
        ty2 = boxes[i,3]
        ts = boxes[i,4]

        pos = i + 1
        # get max box
        while pos < N:
            if maxscore < boxes[pos, 4]:
                maxscore = boxes[pos, 4]
                maxpos = pos
            pos = pos + 1

        # add max box as a detection
        boxes[i,0] = boxes[maxpos,0]
        boxes[i,1] = boxes[maxpos,1]
        boxes[i,2] = boxes[maxpos,2]
        boxes[i,3] = boxes[maxpos,3]
        boxes[i,4] = boxes[maxpos,4]

        # swap ith box with position of max box
        boxes[maxpos,0] = tx1
        boxes[maxpos,1] = ty1
        boxes[maxpos,2] = tx2
        boxes[maxpos,3] = ty2
        boxes[maxpos,4] = ts

        tx1 = boxes[i,0]
        ty1 = boxes[i,1]
        tx2 = boxes[i,2]
        ty2 = boxes[i,3]
        ts = boxes[i,4]

        pos = i + 1

        # NMS iterations, note that N changes if detection boxes fall below threshold
        while pos < N:
            x1 = boxes[pos, 0]
            y1 = boxes[pos, 1]
            x2 = boxes[pos, 2]
            y2 = boxes[pos, 3]
            s = boxes[pos, 4]

            area = (x2 - x1 + 1) * (y2 - y1 + 1)
            iw = (min(tx2, x2) - max(tx1, x1) + 1)
            if iw > 0:
                ih = (min(ty2, y2) - max(ty1, y1) + 1)
                if ih > 0:
                    ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih)
                    ov = iw * ih / ua #iou between max box and detection box

                    if method == 1: # linear
                        if ov > Nt:
                            weight = 1 - ov
                        else:
                            weight = 1
                    elif method == 2: # gaussian
                        weight = np.exp(-(ov * ov)/sigma)
                    else: # original NMS
                        if ov > Nt:
                            weight = 0
                        else:
                            weight = 1

                    boxes[pos, 4] = weight*boxes[pos, 4]

                    # if box score falls below threshold, discard the box by swapping with last box
                    # update N
                    if boxes[pos, 4] < threshold:
                        boxes[pos,0] = boxes[N-1, 0]
                        boxes[pos,1] = boxes[N-1, 1]
                        boxes[pos,2] = boxes[N-1, 2]
                        boxes[pos,3] = boxes[N-1, 3]
                        boxes[pos,4] = boxes[N-1, 4]
                        N = N - 1
                        pos = pos - 1

            pos = pos + 1

    keep = [i for i in range(N)]
    return keep

def bbox(boxes, name='test', thickness=2):
    img = np.zeros([500, 500], float)
    shape = img.shape
    num = len(boxes)
    for i in range(num):
        bbox = boxes[i]
        # Draw bounding box
        p1 = (int(bbox[0]), int(bbox[1]))
        p2 = (int(bbox[2]), int(bbox[3]))
        cv2.rectangle(img, p1[::-1], p2[::-1], (128, 128, 128), thickness)
        # Draw text

        s = '%s' % (float('%.2f' % bbox[4]))
        p1 = (p1[0]-5, p1[1])
        cv2.putText(img, s, p1[::-1], cv2.FONT_HERSHEY_DUPLEX, 0.7, (128, 128, 128), 1)

    f, axes = plt.subplots(1, 1, figsize=(12, 12))
    f.tight_layout()
    axes.imshow(img)
    axes.set_title(name, fontsize=25)



if __name__ == '__main__':
    # before softnms
    bbox(boxes, 'before soft-nms')

    # softnms
    remain = soft_nms(boxes)

    # after softnms
    bbox(boxes[remain], 'after soft-nms')

soft-nms之前：