非极大值抑制(NMS):去除掉与得分最高的目标框IOU值>thresh的目标框,保留与得分最高的目标框IOU值<thresh的目标框;
算法:
输入:dets[x1,y1,x2,y2,score]为框列表 ,thresh为IOU值
1.将所有框按score值从大到小排序,将索引存储为列表order
2.取order中的第一个值(对应为score最大的框)
3.计算该框与其他框的IOU值
4.选取IOU<thresh的框
5.将选定的框的索引赋值给order
6.如果order中有值,则执行步骤2
下面时Fast R-CNN关于NMS的源代码(python版),Faster R-CNN也是用的这段代码。
import numpy as np
import cv2
def py_cpu_nms(dets, thresh):
"""Pure Python NMS baseline."""
x1 = dets[:, 0]
y1 = dets[:, 1]
x2 = dets[:, 2]
y2 = dets[:, 3]
scores = dets[:, 4]
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
#数组值从大到小的索引值,argsort()得到数组值从小到大的索引值,[::-1]倒序值
order = scores.argsort()[::-1]
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
#求其他框与评分最高的框的共有区域
xx1 = np.maximum(x1[i], x1[order[1:]]) #xx1的值是按order中的排序返回的
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = w * h
#共有区域与两个框总面积的比值,获取其他目标框与得分最高的框IOU值
ovr = inter / (areas[i] + areas[order[1:]] - inter)
inds = np.where(ovr <= thresh)[0]
order = order[inds + 1] #获取与最高得分的边框IOU值小于thresh的框
return keep
图像显示
def score(dets,y):
#物体框的score=物体框与目标框的IOU值/目标框的
myscore=[]
ovr = ((y[2] - y[0] + 1) * (y[3] - y[1] + 1))
for i in range(len(dets)):
x1=np.maximum(dets[i][0],y[0])
y1 = np.maximum(dets[i][1], y[1])
x2 = np.minimum(dets[i][2], y[2])
y2 = np.minimum(dets[i][3], y[3])
w=np.maximum(0.0,x2-x1+1)
h = np.maximum(0.0, y2 - y1 + 1)
inter=w*h
myscore.append(inter/ovr)
return myscore
if __name__ == '__main__':
dets = np.array([[212., 312., 812., 712.],
[262., 262., 762., 762.],
[312., 213., 712., 812.],
])
y = np.array([542., 400., 842., 700.])
score_1=np.array([score(dets,y)]).T #形状为(3, 1),[0.90033223,0.73421927,0.56810631]]
dets=np.hstack((dets,score_1)) #形状为(3, 5)
keep=py_cpu_nms(dets,0.2) #keep=[0],即得分最高的框为dets[0]
# 图像只支持uint8类型数据,可以通过astype转变数据类型
img = np.array(np.random.uniform(0, 255, size=(1024, 1024, 3)), dtype=np.uint8)
# print(img)
# img=cv2.imread(path)
print(type(img))
w, h, c = img.shape
for i in range(3):
x1 = int(dets[i][0])
y1 = int(dets[i][1])
x2 = int(dets[i][2])
y2 = int(dets[i][3])
cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 3)
cv2.putText(img, str(i), (x1, y1), cv2.FONT_HERSHEY_COMPLEX_SMALL, 4, (0, 255, 0), 3)
cv2.rectangle(img, (int(y[0]), int(y[1])), (int(y[2]), int(y[3])), (0, 0, 255), 3)
cv2.rectangle(img, (int(dets[keep][0]), int(dets[keep][1])), (int(dets[keep][2]), int(dets[keep][3])), (255, 0, 255), 3)
cv2.imshow('img', img)
cv2.waitKey()
cv2.destroyAllWindows()