1. 基本思想
NMS(Non-Maximum Suppression)算法本质是搜索局部极大值,抑制非极大值元素。NMS就是需要根据score矩阵和region的坐标信息,从中找到置信度比较高的bounding box。当两个box空间位置非常接近,就以score更高的那个作为基准,看IOU即重合度如何,如果与其重合度超过阈值,就抑制score更小的box,因为没有必要输出两个接近的box,只保留score大的就可以了。
2. 计算流程
1. 所有检测框按置信度从高到低排序
2. 取当前置信度最高的框,然后删除和这个框的iou高于阈值的框
3. 重复第2步直到所有框处理完。
3. 存在问题
- 顺序处理的模式,计算IoU拖累了运算效率。
- 剔除机制太严格,依据NMS阈值暴力剔除。
- 阈值是经验选取的。
- 评判标准是IoU,即只考虑两个框的重叠面积,这对描述box重叠关系或许不够全面。
4. 代码实现
import numpy as np
class multiclass_nms():
"""
bboxes: [N,M,4],表示将预测M个边界框的预测位置, N是batch size。每个边界框有四个坐标值,布局为[xmin,ymin,xmax,ymax]。
scores: [N,C,M],表示预测的置信度。 N 是 batch size,C是种类数目,M是边界框bounding box的数量。对于每个类别,存在对应于M个边界框的总M个分数。
score_thresh: 过滤掉低置信度分数的边界框的阈值。如果没有提供,请考虑所有边界框。
pos_nms_topk: NMS步骤后每个图像要保留的总bbox数。
nms_thresh: 在NMS中用于剔除检测框IOU的阈值。
"""
def __init__(self, score_thresh=0.1, nms_thresh=0.5, pos_nms_topk=120):
self.score_thresh = score_thresh
self.nms_thresh = nms_thresh
self.pos_nms_topk = pos_nms_topk
def multiclass_nms_(self, boxes, scores):
batch_size = boxes.shape[0]
class_num = scores.shape[1]
rets = []
for i in range(batch_size):
bboxes_i = boxes[i]
scores_i = scores[i]
ret = []
# 遍历所有类别,进行单分类NMS
for c in range(class_num):
scores_i_c = scores_i[c]
keep_inds = self.nms(bboxes_i, scores_i_c)
if len(keep_inds) < 1:
continue
keep_bboxes = bboxes_i[keep_inds]
keep_scores = scores_i_c[keep_inds]
keep_results = np.zeros([keep_scores.shape[0], 6])
keep_results[:, 0] = c
keep_results[:, 1] = keep_scores[:]
keep_results[:, 2:6] = keep_bboxes[:, :]
ret.append(keep_results)
if len(ret) < 1:
rets.append(ret)
continue
ret_i = np.concatenate(ret, axis=0)
scores_i = ret_i[:, 1]
if len(scores_i) > self.pos_nms_topk:
inds = np.argsort(scores_i)[::-1]
inds = inds[:self.pos_nms_topk]
ret_i = ret_i[inds]
rets.append(ret_i)
return rets
def box_iou_xyxy(self, box1, box2):
x1min, y1min, x1max, y1max = box1[0], box1[1], box1[2], box1[3]
s1 = (y1max - y1min + 1.) * (x1max - x1min + 1.)
x2min, y2min, x2max, y2max = box2[0], box2[1], box2[2], box2[3]
s2 = (y2max - y2min + 1.) * (x2max - x2min + 1.)
xmin = np.maximum(x1min, x2min)
ymin = np.maximum(y1min, y2min)
xmax = np.minimum(x1max, x2max)
ymax = np.minimum(y1max, y2max)
inter_h = np.maximum(ymax - ymin + 1., 0.)
inter_w = np.maximum(xmax - xmin + 1., 0.)
intersection = inter_h * inter_w
union = s1 + s2 - intersection
iou = intersection / union
return iou
def nms(self, bboxes, scores):
# 对预测框得分进行排序
inds = np.argsort(scores)
inds = inds[::-1]
keep_inds = []
# 循环遍历预测框
while(len(inds) > 0):
cur_ind = inds[0]
cur_score = scores[cur_ind]
# 如果预测框得分低于阈值,则退出循环
if cur_score < self.score_thresh:
break
# 计算当前预测框与保留列表中的预测框IOU,如果小于阈值则保留该预测框,否则丢弃该预测框
keep = True
for ind in keep_inds:
current_box = bboxes[cur_ind]
remain_box = bboxes[ind]
# 计算当前预测框与保留列表中的预测框IOU
iou = self.box_iou_xyxy(current_box, remain_box)
if iou > self.nms_thresh:
keep = False
break
if keep:
keep_inds.append(cur_ind)
inds = inds[1:]
return np.array(keep_inds)