- non_max_suppression:自己写的差点意思,记录下别人的
IOU
def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False):
# Returns the IoU of box1 to box2. box1 is 4, box2 is nx4
box2 = box2.t()
# Get the coordinates of bounding boxes
if x1y1x2y2:
# x1, y1, x2, y2 = box1
b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
else:
# x, y, w, h = box1
b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2
b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2
b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2
b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2
# Intersection area
inter_area = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \
(torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)
# Union Area
union_area = ((b1_x2 - b1_x1) * (b1_y2 - b1_y1) + 1e-16) + \
(b2_x2 - b2_x1) * (b2_y2 - b2_y1) - inter_area
iou = inter_area / union_area # iou
if GIoU: # Generalized IoU https://arxiv.org/pdf/1902.09630.pdf
c_x1, c_x2 = torch.min(b1_x1, b2_x1), torch.max(b1_x2, b2_x2)
c_y1, c_y2 = torch.min(b1_y1, b2_y1), torch.max(b1_y2, b2_y2)
c_area = (c_x2 - c_x1) * (c_y2 - c_y1) + 1e-16 # convex area
return iou - (c_area - union_area) / c_area # GIoU
return iou
NMS
import numpy as np
def nms(dets, thresh):
"""Pure Python NMS baseline."""
x1 = dets[:, 0]
y1 = dets[:, 1]
x2 = dets[:, 2]
y2 = dets[:, 3]
scores = dets[:, 4]
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
order = scores.argsort()[::-1]
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = w * h
ovr = inter / (areas[i] + areas[order[1:]] - inter)
inds = np.where(ovr <= thresh)[0]
order = order[inds + 1]
return keep
SoftNMS
def box_soft_nms(bboxes, scores, labels, nms_threshold=0.3, soft_threshold=0.3, sigma=0.5, mode='union'):
"""
soft-nms implentation according the soft-nms paper
:param bboxes: all pred bbox
:param scores: all pred cls
:param labels: all detect class label,注:scores只是单纯的得分,需配合label才知道具体对应的是哪个类
:param nms_threshold: origin nms thres, for judging to reduce the cls score of high IoU pred bbox
:param soft_threshold: after cls score of high IoU pred bbox been reduced, soft_thres further filtering low score pred bbox
:return:
"""
unique_labels = labels.cpu().unique().cuda() # 获取pascal voc 20类标签
box_keep = []
labels_keep = []
scores_keep = []
for c in unique_labels: # 相当于NMS中对每一类的操作,对应step-1
c_boxes = bboxes[labels == c] # bboxes、scores、labels一一对应,按照label == c就可以取出对应类别 c 的c_boxes、c_scores
c_scores = scores[labels == c]
weights = c_scores.clone()
x1 = c_boxes[:, 0]
y1 = c_boxes[:, 1]
x2 = c_boxes[:, 2]
y2 = c_boxes[:, 3]
areas = (x2 - x1 + 1) * (y2 - y1 + 1) # bbox面积
_, order = weights.sort(0, descending=True) # bbox根据score降序排序,对应NMS中step-2
while order.numel() > 0: # 对应NMS中step-5
i = order[0] # 当前order中的top-1,保存之
box_keep.append(c_boxes[i]) # 保存bbox
labels_keep.append(c) # 保存cls_id
scores_keep.append(c_scores[i]) # 保存cls_score
if order.numel() == 1: # 当前order就这么一个bbox了,那不玩了,下一个类的bbox操作吧
break
xx1 = x1[order[1:]].clamp(min=x1[i]) # 别忘了x1[i]对应x1[order[0]],也即top-1,寻找Insection区域的坐标
yy1 = y1[order[1:]].clamp(min=y1[i])
xx2 = x2[order[1:]].clamp(max=x2[i])
yy2 = y2[order[1:]].clamp(max=y2[i])
w = (xx2 - xx1 + 1).clamp(min=0) # Insection区域的宽、高、面积
h = (yy2 - yy1 + 1).clamp(min=0)
inter = w * h
# IoU中U的计算模式,两种union、min,比较容易理解
if mode == 'union':
ovr = inter / (areas[i] + areas[order[1:]] - inter)
elif mode == 'min':
ovr = inter / areas[order[1:]].clamp(max=areas[i])
else:
raise TypeError('Unknown nms mode: %s.' % mode)
# 经过origin NMS thres,得到高IoU的bboxes index,
# origin NMS操作就直接剔除掉这些bbox了,soft-NMS就是对这些bbox对应的score做权重降低
ids_t= (ovr>=nms_threshold).nonzero().squeeze() # 高IoU的bbox,与inds = np.where(ovr >= nms_threshold)[0]功能类似
# torch.exp(-(ovr[ids_t] * ovr[ids_t]) / sigma):这个比较好理解,对score做权重降低的参数,从fig 2、公式中都可以参考
# order[ids_t+1]:+1对应x1[order[0]],也即top-1,若需映射回order中各个bbox,就必须+1
# 这样整体上就容易理解了,就是soft-nms的score抑制方式,未使用NMS中粗暴的直接score = 0的抑制方式
weights[[order[ids_t+1]]] *= torch.exp(-(ovr[ids_t] * ovr[ids_t]) / sigma)
# soft-nms对高IoU pred bbox的score调整了一次,soft_threshold仅用于对score抑制,score太小就不考虑了
ids = (weights[order[1:]] >= soft_threshold).nonzero().squeeze() # 这一轮未被抑制的bbox
if ids.numel() == 0: # 竟然全被干掉了,下一个类的bbox操作吧
break
c_boxes = c_boxes[order[1:]][ids] # 先取得c_boxes[order[1:]],再在其基础之上操作[ids],获得这一轮未被抑制的bbox
c_scores = weights[order[1:]][ids]
_, order = c_scores.sort(0, descending=True)
if c_boxes.dim()==1:
c_boxes=c_boxes.unsqueeze(0)
c_scores=c_scores.unsqueeze(0)
x1 = c_boxes[:, 0] # 因为bbox已经做了筛选了,areas需要重新计算一遍,抑制的bbox剔除掉
y1 = c_boxes[:, 1
x2 = c_boxes[:, 2]
y2 = c_boxes[:, 3]
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
return box_keep, labels_keep, scores_keep # scores_keep保存的是未做权重降低的score,降低权重的score仅用于soft-nms操作
FastNMS
def fast_nms(self, boxes, masks, scores, iou_threshold:float=0.5, top_k:int=200, second_threshold:bool=False):
'''
boxes: torch.Size([num_dets, 4])
masks: torch.Size([num_dets, 32])
scores: torch.Size([num_classes, num_dets])
'''
# step1: 每一类的框按照scores降序排序后取前top_k个
scores, idx = scores.sort(1, descending=True)
# scores为降序排列
# idx为原顺序的索引
idx = idx[:, :top_k].contiguous() # 取前top_k个框
scores = scores[:, :top_k]
num_classes, num_dets = idx.size()
boxes = boxes[idx.view(-1), :].view(num_classes, num_dets, 4) # torch.Size([num_classes, num_dets, 4])
masks = masks[idx.view(-1), :].view(num_classes, num_dets, -1) # torch.Size([num_classes, num_dets, 32]) 其中32为生成的系数个数
# step2: 计算每一类中,box与box之间的IoU
iou = jaccard(boxes, boxes) # torch.Size([num_classes, num_dets, num_dets])
iou.triu_(diagonal=1) # triu_()取上三角 tril_()取下三角 此处将矩阵的下三角和对角线元素删去
iou_max, _ = iou.max(dim=1) # 按列取大值 torch.Size([num_classes, num_dets])
# 过滤掉iou大于阈值的框
keep = (iou_max <= iou_threshold) # torch.Size([num_classes, num_dets])
if second_threshold: # 保证保留的框满足一定的置信度
keep *= (scores > self.conf_thresh)
# Assign each kept detection to its corresponding class
classes = torch.arange(num_classes, device=boxes.device)[:, None].expand_as(keep)
'''
tensor([[ 0, 0, 0, ..., 0, 0, 0],
[ 1, 1, 1, ..., 1, 1, 1],
[ 2, 2, 2, ..., 2, 2, 2],
...,
[77, 77, 77, ..., 77, 77, 77],
[78, 78, 78, ..., 78, 78, 78],
[79, 79, 79, ..., 79, 79, 79]])
'''
classes = classes[keep]
boxes = boxes[keep]
masks = masks[keep]
scores = scores[keep]
# Only keep the top cfg.max_num_detections highest scores across all classes
scores, idx = scores.sort(0, descending=True)
idx = idx[:cfg.max_num_detections]
scores = scores[:cfg.max_num_detections]
classes = classes[idx]e
boxes = boxes[idx]
masks = masks[idx]
return boxes, masks, classes, scores # torch.Size([max_num_detections])