非极大抑制算法NMS
NMS的目的是,消除多余的框,只保留最佳预测框。原理在这里不详细讲啦,没有什么很多数学公式,列举一下大概步骤:
- 对所有预测框按置信度进行排序
- 找出分数最高的检测框Bmax
- 遍历其他框,计算当前与Bmax的重叠面积
- 如果重叠面积大于设定的阈值,则剔除
- 继续对未处理过的预测框进行排序
- 重复第1到5步…
搬了b站一大佬的代码,并写上了注释,有需要自取~
def nms(boxes, num_classes, conf_thres=0.5, nms_thres=0.4):
#boxes: 检测的boxes及scores,维度(batch_size, all_boxes, 4+1+num_classes)
#num_classes: 类别的数量
#conf_thres: 预测框的阈值, nms_thres: 极大值抑制的阈值
#取batch size
bs = np.shape(boxes)[0]
#将预测框(中心宽高)转成左上角右下角的形式
shape_boxes = np.zeros_like(boxes[:,:,:4])
shape_boxes[:,:,0] = boxes[:,:,0] - boxes[:,:,2]/2
shape_boxes[:,:,1] = boxes[:,:,1] - boxes[:,:,3]/2
shape_boxes[:,:,2] = boxes[:,:,0] + boxes[:,:,2]/2
shape_boxes[:,:,3] = boxes[:,:,1] + boxes[:,:,3]/2
boxes[:,:,:4] = shape_boxes
output = []
#遍历每一个boxes
for i in range(bs):
# prediction shape (num_boxes, 4+1+num_classes)
prediction = boxes[i]
#获取存在物体的概率
score = prediction[:, 4]
#利用mask筛选出一张图像里面置信度大于设定值的预测框
mask = score > conf_thres
detection = prediction[mask]
#找到这些预测框属于什么种类
#取prediction (num_boxes, 4+1+num_classes)最后一个值
# 对其求max,找出属于某种类的概率
class_conf = np.expand_dims(np.max(detections[:, 5:], axis=-1), axis=-1)
#对其求argmax,找出属于哪个种类
class_pred = np.expand_dims(np.argmax(detections[:, 5:], axis=-1), axis=-1)
#堆叠,得到的维度(num_boxes, 4+1+2) : 2代表所属类别的置信度和种类
detections = np.concatenate([detections[:, :5], class_conf, class_pred], axis=-1)
#对种类进行一个去重的操作
unique_class = np.unique(detection[:, -1])
if(len(unique_class)==0):
continue
#存放nms的结果
best_box = []
#遍历每个类别
for c in unique_class:
# 获取该类别的预测框
cls_mask = detections[:, -1] == c
detection = detections[cls_mask]
#对得分进行从大到小排序
scores = detection[:, 4]
arg_sort = np.argsort(scores)[::-1] #[::-1]取反,从大到小排序
detection = detection[arg_sort]
#
while len(detection) != 0:
#首先取出最大的框,保存到结果里
best_box.append(detection[0])
if len(detection)==1:
break
#计算最大框和其他框的重合程度,重合程度大的剔除
ious = IoU(best_box[-1], detection[1:])
detection = detection[1:][ious<nms_thres]
output.append(best_box)
return np.array(output)
def IoU(b1, b2):
#计算两个框的重合程度
b1_x1, b1_y1, b1_x2, b1_y2 = b1[0], b1[1], b1[2], b1[3]
b2_x1, b2_y1, b2_x2, b2_y2 = b2[:, 0], b2[:, 1], b2[:, 2], b2[:, 3]
#求交集
inter_rect_x1 = np.maximum(b1_x1, b2_x1)
inter_rect_y1 = np.maximum(b1_y1, b2_y1)
inter_rect_x2 = np.maximum(b1_x2, b2_x2)
inter_rect_y2 = np.maximum(b1_y2, b2_y2)
inter_area = np.maximum(inter_rect_x2 - inter_rect_x1, 0) * \
np.maximum(inter_rect_y2 - inter_rect_y1, 0)
#求并集
area_b1 = (b1_x2 - b1_x1) * (b1_y2 - b1_y1)
area_b2 = (b2_x2 - b2_x1) * (b2_y2 - b2_y1)
#求iou值
iou = inter_area / np.maximum(area_b1 + area_b2 - inter_area, 1e-6)
return iou