【深度学习】非极大抑制算法及python代码

Florrie Zhu

已于 2022-09-01 11:23:19 修改

阅读量1.2k

点赞数 1

分类专栏：深度学习之基础知识文章标签： python 深度学习算法 nms 源码

于 2022-01-04 16:14:49 首次发布

本文链接：https://blog.csdn.net/Resume_f/article/details/122305336

版权

深度学习之基础知识专栏收录该内容

14 篇文章 16 订阅

订阅专栏

非极大抑制算法NMS

NMS的目的是，消除多余的框，只保留最佳预测框。原理在这里不详细讲啦，没有什么很多数学公式，列举一下大概步骤：

对所有预测框按置信度进行排序
找出分数最高的检测框Bmax
遍历其他框，计算当前与Bmax的重叠面积
如果重叠面积大于设定的阈值，则剔除
继续对未处理过的预测框进行排序
重复第1到5步…

搬了b站一大佬的代码，并写上了注释，有需要自取~

def nms(boxes, num_classes, conf_thres=0.5, nms_thres=0.4):
	 #boxes: 检测的boxes及scores，维度(batch_size, all_boxes, 4+1+num_classes)
	 #num_classes: 类别的数量
	 #conf_thres: 预测框的阈值, nms_thres: 极大值抑制的阈值

	 #取batch size
	 bs = np.shape(boxes)[0]
	
	 #将预测框（中心宽高）转成左上角右下角的形式
	 shape_boxes = np.zeros_like(boxes[:,:,:4])
	 shape_boxes[:,:,0] = boxes[:,:,0] - boxes[:,:,2]/2
	 shape_boxes[:,:,1] = boxes[:,:,1] - boxes[:,:,3]/2
	 shape_boxes[:,:,2] = boxes[:,:,0] + boxes[:,:,2]/2
	 shape_boxes[:,:,3] = boxes[:,:,1] + boxes[:,:,3]/2
	
	 boxes[:,:,:4] = shape_boxes
	 output = []
	 
	 #遍历每一个boxes
	 for i in range(bs):
	  # prediction shape (num_boxes, 4+1+num_classes)
	  prediction = boxes[i]
	
	  #获取存在物体的概率
	  score = prediction[:, 4]
	
	  #利用mask筛选出一张图像里面置信度大于设定值的预测框
	  mask = score > conf_thres
	  detection = prediction[mask]
	
	  #找到这些预测框属于什么种类
	  #取prediction (num_boxes, 4+1+num_classes)最后一个值
	  # 对其求max，找出属于某种类的概率
	  class_conf = np.expand_dims(np.max(detections[:, 5:], axis=-1), axis=-1)
	  #对其求argmax，找出属于哪个种类
	  class_pred = np.expand_dims(np.argmax(detections[:, 5:], axis=-1), axis=-1)
	  
	  #堆叠，得到的维度(num_boxes, 4+1+2) : 2代表所属类别的置信度和种类
	  detections = np.concatenate([detections[:, :5], class_conf, class_pred], axis=-1)
	
	  #对种类进行一个去重的操作
	  unique_class = np.unique(detection[:, -1])
	  if(len(unique_class)==0):
	   continue
	  
	  #存放nms的结果
	  best_box = []
	  
	  #遍历每个类别
	  for c in unique_class:
	   # 获取该类别的预测框
	   cls_mask = detections[:, -1] == c 
	   detection = detections[cls_mask]
	
	   #对得分进行从大到小排序
	   scores = detection[:, 4]
	   arg_sort = np.argsort(scores)[::-1] #[::-1]取反，从大到小排序
	   detection = detection[arg_sort]
	
	   #
	   while len(detection) != 0:
	    #首先取出最大的框，保存到结果里
	    best_box.append(detection[0])
	
	    if len(detection)==1:
	     break
	
	    #计算最大框和其他框的重合程度，重合程度大的剔除
	    ious = IoU(best_box[-1], detection[1:])
	    detection = detection[1:][ious<nms_thres]
	
	  output.append(best_box)
	 return np.array(output)
	



def IoU(b1, b2):
	 #计算两个框的重合程度
	 b1_x1, b1_y1, b1_x2, b1_y2 = b1[0], b1[1], b1[2], b1[3]
	 b2_x1, b2_y1, b2_x2, b2_y2 = b2[:, 0], b2[:, 1], b2[:, 2], b2[:, 3]
	  
	 #求交集
	 inter_rect_x1 = np.maximum(b1_x1, b2_x1) 
	 inter_rect_y1 = np.maximum(b1_y1, b2_y1)
	 inter_rect_x2 = np.maximum(b1_x2, b2_x2)
	 inter_rect_y2 = np.maximum(b1_y2, b2_y2)
	
	 inter_area = np.maximum(inter_rect_x2 - inter_rect_x1, 0) * \
	       np.maximum(inter_rect_y2 - inter_rect_y1, 0)
	
	 #求并集
	 area_b1 = (b1_x2 - b1_x1) * (b1_y2 - b1_y1)
	 area_b2 = (b2_x2 - b2_x1) * (b2_y2 - b2_y1)
	
	 #求iou值
	 iou = inter_area / np.maximum(area_b1 + area_b2 - inter_area, 1e-6)
	 return iou