有什么比一张图更能说明问题呢。nms广泛应用于边缘检测,人脸检测和目标检测等,用于消除冗余的框。也有专门研究它的论文。如下就Faster-RCNN_TF中nms的python源码进行注释。
![]() |
图1 nms的一个功能示意图,图片来源 |
- 源码文件nms.py
def nms(dets, thresh):
x1 = dets[:, 0]
y1 = dets[:, 1]
x2 = dets[:, 2]
y2 = dets[:, 3]
scores = dets[:, 4]
areas = (x2 - x1 + 1) * (y2 - y1 + 1) #所有box面积
print "all box aress: ", areas
order = scores.argsort()[::-1] #降序排列得到scores的坐标索引
keep = []
while order.size > 0:
i = order[0] #最大得分box的坐标索引
keep.append(i)
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]]) #最高得分的boax与其他box的公共部分(交集)
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1) #求高和宽,并使数值合法化
inter = w * h #其他所有box的面积
ovr = inter / (areas[i] + areas[order[1:]] - inter) #IOU:交并比
inds = np.where(ovr <= thresh)[0] #ovr小表示两个box交集少,可能是另一个物体的框,故需要保留
order = order[inds + 1] #iou小于阈值的框
return keep
- 测试用例
import cv2
import numpy as np
import random
img=np.zeros((300,400), np.uint8)
dets=np.array([[83,54,165,163,0.8], [67,48,118,132,0.5], [91,38,192,171,0.6]], np.float)
img_cp=img.copy()
for box in dets.tolist(): #显示待测试框及置信度
x1,y1,x2,y2,score=int(box[0]),int(box[1]),int(box[2]),int(box[3]),box[-1]
y_text=int(random.uniform(y1, y2))
cv2.rectangle(img_cp, (x1,y1), (x2, y2), (255, 255, 255), 2)
cv2.putText(img_cp, str(score), (x2-30, y_text), 2,1, (255, 255, 0))
cv2.imshow("ori_img", img_cp)
rtn_box=nms(dets, 0.3) #0.3为faster-rcnn中配置文件的默认值
cls_dets=dets[rtn_box, :]
print "nms box:", cls_dets
img_cp=img.copy()
for box in cls_dets.tolist():
x1,y1,x2,y2,score=int(box[0]),int(box[1]),int(box[2]),int(box[3]),box[-1]
y_text=int(random.uniform(y1, y2))
cv2.rectangle(img_cp, (x1,y1), (x2, y2), (255, 255, 255), 2)
cv2.putText(img_cp, str(score), (x2-30, y_text), 2,1, (255, 255, 0))
输出图像为
参考文献: