上一篇文章我们介绍了NMS算法。结尾处我们提到传统NMS存在的两个问题:
1. 当两个目标框接近时,分数更低的框就会因为与之重叠面积过大而被删掉
2. NMS的阈值需要手动确定,设置小了会漏检,设置大会误检
针对上述两个问题,我们可以不直接删除所有IOU大于阈值的框,而是降低其置信度,即softnms算法原理。
NMS公式:
softnms改进思想是:M为当前得分最高框,bi为待处理框,bi和M的IOU越大,bi的得分si就下降的越厉害(而不是直接置零)。有两种衰减方式,一种是线性加权:
一种是高斯加权:
代码如下:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wednesday August 14 22:28:00 2018
@author: CW
"""
import numpy as np
import cv2
import matplotlib.pyplot as plt
boxes = np.array([[100, 100, 210, 210, 0.71],
[250, 250, 420, 420, 0.8],
[220, 200, 320, 330, 0.92],
[100, 100, 210, 210, 0.72],
[230, 240, 325, 330, 0.81],
[220, 230, 315, 340, 0.91]])
def iou(xmin, ymin, xmax, ymax, areas, lastInd, beforeInd, threshold):
# 将lastInd指向的box,与之前的所有存活的box指向坐标做比较
xminTmp = np.maximum(xmin[lastInd], xmin[beforeInd])
yminTmp = np.maximum(ymin[lastInd], ymin[beforeInd])
xmaxTmp = np.minimum(xmax[lastInd], xmax[beforeInd])
ymaxTmp = np.minimum(ymax[lastInd], ymax[beforeInd])
# 计算lastInd指向的box,与其他box交集的,所有width,height
width = np.maximum(0.0, xmaxTmp - xminTmp + 1)
height = np.maximum(0.0, ymaxTmp - yminTmp + 1)
# 计算存活box与last指向box的交集面积
intersection = width * height
union = areas[beforeInd] + areas[lastInd] - intersection
iou_value = intersection / union
indexOutput = [item[0] for item in zip(beforeInd, iou_value) if item[1] <= threshold]
return indexOutput
def soft_nms(boxes, threshold=0.001, sigma=0.5, Nt=0.3, method=1):
N = len(boxes)
for i in range(N):
maxscore = boxes[i, 4]
maxpos = i
tx1 = boxes[i,0]
ty1 = boxes[i,1]
tx2 = boxes[i,2]
ty2 = boxes[i,3]
ts = boxes[i,4]
pos = i + 1
# get max box
while pos < N:
if maxscore < boxes[pos, 4]:
maxscore = boxes[pos, 4]
maxpos = pos
pos = pos + 1
# add max box as a detection
boxes[i,0] = boxes[maxpos,0]
boxes[i,1] = boxes[maxpos,1]
boxes[i,2] = boxes[maxpos,2]
boxes[i,3] = boxes[maxpos,3]
boxes[i,4] = boxes[maxpos,4]
# swap ith box with position of max box
boxes[maxpos,0] = tx1
boxes[maxpos,1] = ty1
boxes[maxpos,2] = tx2
boxes[maxpos,3] = ty2
boxes[maxpos,4] = ts
tx1 = boxes[i,0]
ty1 = boxes[i,1]
tx2 = boxes[i,2]
ty2 = boxes[i,3]
ts = boxes[i,4]
pos = i + 1
# NMS iterations, note that N changes if detection boxes fall below threshold
while pos < N:
x1 = boxes[pos, 0]
y1 = boxes[pos, 1]
x2 = boxes[pos, 2]
y2 = boxes[pos, 3]
s = boxes[pos, 4]
area = (x2 - x1 + 1) * (y2 - y1 + 1)
iw = (min(tx2, x2) - max(tx1, x1) + 1)
if iw > 0:
ih = (min(ty2, y2) - max(ty1, y1) + 1)
if ih > 0:
ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih)
ov = iw * ih / ua #iou between max box and detection box
if method == 1: # linear
if ov > Nt:
weight = 1 - ov
else:
weight = 1
elif method == 2: # gaussian
weight = np.exp(-(ov * ov)/sigma)
else: # original NMS
if ov > Nt:
weight = 0
else:
weight = 1
boxes[pos, 4] = weight*boxes[pos, 4]
# if box score falls below threshold, discard the box by swapping with last box
# update N
if boxes[pos, 4] < threshold:
boxes[pos,0] = boxes[N-1, 0]
boxes[pos,1] = boxes[N-1, 1]
boxes[pos,2] = boxes[N-1, 2]
boxes[pos,3] = boxes[N-1, 3]
boxes[pos,4] = boxes[N-1, 4]
N = N - 1
pos = pos - 1
pos = pos + 1
keep = [i for i in range(N)]
return keep
def bbox(boxes, name='test', thickness=2):
img = np.zeros([500, 500], float)
shape = img.shape
num = len(boxes)
for i in range(num):
bbox = boxes[i]
# Draw bounding box
p1 = (int(bbox[0]), int(bbox[1]))
p2 = (int(bbox[2]), int(bbox[3]))
cv2.rectangle(img, p1[::-1], p2[::-1], (128, 128, 128), thickness)
# Draw text
s = '%s' % (float('%.2f' % bbox[4]))
p1 = (p1[0]-5, p1[1])
cv2.putText(img, s, p1[::-1], cv2.FONT_HERSHEY_DUPLEX, 0.7, (128, 128, 128), 1)
f, axes = plt.subplots(1, 1, figsize=(12, 12))
f.tight_layout()
axes.imshow(img)
axes.set_title(name, fontsize=25)
if __name__ == '__main__':
# before softnms
bbox(boxes, 'before soft-nms')
# softnms
remain = soft_nms(boxes)
# after softnms
bbox(boxes[remain], 'after soft-nms')
soft-nms之前:
soft-nms之后:
如图边界框均得到保留,但是重叠框的置信度发生了变化。