目录
yolov5 cluster_nms,cluster_SPM_nms,cluster_diounms,cluster_SPM_dist_nms,diou_nms
iou-giou-diou-ciou-nms python numpy 代码例子
yolov5 cluster_nms,cluster_SPM_nms,cluster_diounms,cluster_SPM_dist_nms,diou_nms
本机项目:
yolov5_mangguo_new
调用:
# Non-maximum suppression
if method == 'standard':
nms_indices = nms(pred[:, :4], pred[:, 4], nms_thres)
elif method == 'soft':
nms_indices = soft_nms_pytorch(pred[:, :4], pred[:, 4], sigma=0.5, thresh=0.2, cuda=1)
elif method == "cluster":
nms_indices = cluster_nms(pred[:, :4], pred[:, 4], nms_thres)
elif method == "cluster_SPM":
nms_indices = cluster_SPM_nms(pred[:, :4], pred[:, 4], nms_thres)
elif method == "cluster_diou":
nms_indices = cluster_diounms(pred[:, :4], pred[:, 4], nms_thres)
elif method == "cluster_SPM_dist":
nms_indices = cluster_SPM_dist_nms(pred[:, :4], pred[:, 4], nms_thres)
else:
raise ValueError('Invalid NMS type!')
# -*- coding:utf-8 -*-
import time
import numpy as np
import torch
def soft_nms_pytorch(dets, box_scores, sigma=0.5, thresh=0.001, cuda=0):
"""
Build a pytorch implement of Soft NMS algorithm.
# Augments
dets: boxes coordinate tensor (format:[y1, x1, y2, x2])
box_scores: box score tensors
sigma: variance of Gaussian function
thresh: score thresh
cuda: CUDA flag
# Return
the index of the selected boxes
"""
# Indexes concatenate boxes with the last column
N = dets.shape[0]
if cuda:
indexes = torch.arange(0, N, dtype=torch.float).cuda().view(N, 1)
else:
indexes = torch.arange(0, N, dtype=torch.float).view(N, 1)
dets = torch.cat((dets, indexes), dim=1)
# The order of boxes coordinate is [y1,x1,y2,x2]
y1 = dets[:, 0]
x1 = dets[:, 1]
y2 = dets[:, 2]
x2 = dets[:, 3]
scores = box_scores
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
for i in range(N):
# intermediate parameters for later parameters exchange
tscore = scores[i].clone()
pos = i + 1
if i != N - 1:
maxscore, maxpos = torch.max(scores[pos:], dim=0)
if tscore < maxscore:
dets[i], dets[maxpos.item() + i + 1] = dets[maxpos.item() + i + 1].clone(), dets[i].clone()
scores[i], scores[maxpos.item() + i + 1] = scores[maxpos.item() + i + 1].clone(), scores[i].clone()
areas[i], areas[maxpos + i + 1] = areas[maxpos + i + 1].clone(), areas[i].clone()
# IoU calculate
yy1 = np.maximum(dets[i, 0].to("cpu").numpy(), dets[pos:, 0].to("cpu").numpy())
xx1 = np.maximum(dets[i, 1].to("cpu").numpy(), dets[pos:, 1].to("cpu").numpy())
yy2 = np.minimum(dets[i, 2].to("cpu").numpy(), dets[pos:, 2].to("cpu").numpy())
xx2 = np.minimum(dets[i, 3].to("cpu").numpy(), dets[pos:, 3].to("cpu").numpy())
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = torch.tensor(w * h).cuda() if cuda else torch.tensor(w * h)
ovr = torch.div(inter, (areas[i] + areas[pos:] - inter))
# Gaussian decay
weight = torch.exp(-(ovr * ovr) / sigma)
scores[pos:] = weight * scores[pos:]
# select the boxes and keep the corresponding indexes
keep = dets[:, 4][scores > thresh].int()
return keep.type(torch.long)
def cluster_nms(boxes, scores, iou_threshold: float = 0.5, top_k: int = 200):
# Collapse all the classes into 1
_, idx = scores.sort(0, descending=True)
#idx = idx[:top_k]
boxes_idx = boxes[idx]
iou = jaccard(boxes_idx, boxes_idx).triu_(diagonal=1)
B = iou
for i in range(200):
A = B
maxA, _ = torch.max(A, dim=0)
E = (maxA <= iou_threshold).float().unsqueeze(1).expand_as(A)
B = iou.mul(E)
if A.equal(B) == True:
break
idx_out = idx[maxA <= iou_threshold]
return idx_out
def cluster_diounms(boxes, scores, iou_threshold: float = 0.5,dense_mask=[], top_k: int = 200):
# Collapse all the classes into 1
_, idx = scores.sort(0, descending=True)
#idx = idx[:top_k]
boxes_idx = boxes[idx]
iou = diou(boxes_idx, boxes_idx,delta = 0.7).triu_(diagonal=1)
B = iou
x_inds = (boxes_idx[:, 0] + boxes_idx[:, 2]) // 16
y_inds = (boxes_idx[:, 1] + boxes_idx[:, 3]) // 16
y_inds[y_inds >= 76] = 75
y_inds[y_inds < 0] = 0
x_inds[x_inds >= 136] = 135
x_inds[x_inds < 0] = 0
x_inds = x_inds.cpu().numpy().astype(np.int16).tolist()
y_inds = y_inds.cpu().numpy().astype(np.int16).tolist()
dense_mask = dense_mask.squeeze(dim=0).squeeze(dim=0)
dense_mask = dense_mask[y_inds, x_inds].cuda()
dense_mask[dense_mask <= iou_threshold] = iou_threshold
for i in range(200):
A = B
maxA, _ = torch.max(A, dim=0)
E = (torch.lt(maxA,dense_mask)).float().unsqueeze(1).expand_as(A)
B = iou.mul(E)
if A.equal(B) == True:
break
idx_out = idx[torch.lt(maxA,dense_mask)]
return idx_out
def cluster_SPM_nms(boxes, scores, iou_threshold:float=0.5, top_k:int=200):
# Collapse all the classes into 1
_, idx = scores.sort(0, descending=True)
boxes_idx = boxes[idx]
scores = scores[idx]
boxes = boxes_idx
iou = jaccard(boxes_idx, boxes_idx).triu_(diagonal=1)
B = iou
for i in range(200):
A=B
maxA,_=torch.max(A, dim=0)
E = (maxA<=iou_threshold).float().unsqueeze(1).expand_as(A)
B=iou.mul(E)
if A.equal(B)==True:
break
scores = torch.prod(torch.exp(-B**2/0.2),0)*scores
idx_out = scores > 0.01
return idx[idx_out]
def cluster_SPM_dist_nms(boxes, scores, iou_threshold:float=0.5, top_k:int=200):
# Collapse all the classes into 1
_, idx = scores.sort(0, descending=True)
boxes_idx = boxes[idx]
scores = scores[idx]
boxes = boxes_idx
iou = jaccard(boxes_idx, boxes_idx).triu_(diagonal=1)
B = iou
for i in range(200):
A=B
maxA,_=torch.max(A, dim=0)
E = (maxA<=iou_threshold).float().unsqueeze(1).expand_as(A)
B=iou.mul(E)
if A.equal(B)==True:
break
D=distance(boxes, boxes,delta = 0.7)
X = (B>=0).float()
scores = torch.prod(torch.min(torch.exp(-B**2/0.2)+D*((B>0).float()),X),0)*scores
idx_out = scores > 0.15
return idx[idx_out]
def intersect(box_a, box_b):
""" We resize both tensors to [A,B,2] without new malloc:
[A,2] -> [A,1,2] -> [A,B,2]
[B,2] -> [1,B,2] -> [A,B,2]
Then we compute the area of intersect between box_a and box_b.
Args:
box_a: (tensor) bounding boxes, Shape: [A,4].
box_b: (tensor) bounding boxes, Shape: [B,4].
Return:
(tensor) intersection area, Shape: [A,B].
"""
A = box_a.size(0)
B = box_b.size(0)
max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2),
box_b[:, 2:].unsqueeze(0).expand(A, B, 2))
min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2),
box_b[:, :2].unsqueeze(0).expand(A, B, 2))
inter = torch.clamp((max_xy - min_xy), min=0)
return inter[:, :, 0] * inter[:, :, 1]
def jaccard(box_a, box_b, iscrowd=False):
"""Compute the jaccard overlap of two sets of boxes. The jaccard overlap
is simply the intersection over union of two boxes. Here we operate on
ground truth boxes and default boxes. If iscrowd=True, put the crowd in box_b.
E.g.:
A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
Args:
box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4]
box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4]
Return:
jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)]
"""
inter = intersect(box_a, box_b)
area_a = ((box_a[:, 2]-box_a[:, 0]) *
(box_a[:, 3]-box_a[:, 1])).unsqueeze(1).expand_as(inter) # [A,B]
area_b = ((box_b[:, 2]-box_b[:, 0]) *
(box_b[:, 3]-box_b[:, 1])).unsqueeze(0).expand_as(inter) # [A,B]
union = area_a + area_b - inter
if iscrowd:
return inter / area_a
else:
return inter / union # [A,B]
def diou(box_a, box_b, delta = 0.9,iscrowd:bool=False):
"""Compute the jaccard overlap of two sets of boxes. The jaccard overlap
is simply the intersection over union of two boxes. Here we operate on
ground truth boxes and default boxes. If iscrowd=True, put the crowd in box_b.
E.g.:
A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
Args:
box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4]
box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4]
Return:
jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)]
"""
inter = intersect(box_a, box_b)
use_batch = True
if box_a.dim() == 2:
use_batch = False
box_a = box_a[None, ...]
box_b = box_b[None, ...]
inter = inter[None, ...]
area_a = ((box_a[:, :, 2]-box_a[:, :, 0]) *
(box_a[:, :, 3]-box_a[:, :, 1])).unsqueeze(2).expand_as(inter) # [A,B]
area_b = ((box_b[:, :, 2]-box_b[:, :, 0]) *
(box_b[:, :, 3]-box_b[:, :, 1])).unsqueeze(1).expand_as(inter) # [A,B]
union = area_a + area_b - inter
x1 = ((box_a[:, :, 2]+box_a[:, :, 0]) / 2).unsqueeze(2).expand_as(inter)
y1 = ((box_a[:, :, 3]+box_a[:, :, 1]) / 2).unsqueeze(2).expand_as(inter)
x2 = ((box_b[:, :, 2]+box_b[:, :, 0]) / 2).unsqueeze(1).expand_as(inter)
y2 = ((box_b[:, :, 3]+box_b[:, :, 1]) / 2).unsqueeze(1).expand_as(inter)
t1 = box_a[:, :, 1].unsqueeze(2).expand_as(inter)
b1 = box_a[:, :, 3].unsqueeze(2).expand_as(inter)
l1 = box_a[:, :, 0].unsqueeze(2).expand_as(inter)
r1 = box_a[:, :, 2].unsqueeze(2).expand_as(inter)
t2 = box_b[:, :, 1].unsqueeze(1).expand_as(inter)
b2 = box_b[:, :, 3].unsqueeze(1).expand_as(inter)
l2 = box_b[:, :, 0].unsqueeze(1).expand_as(inter)
r2 = box_b[:, :, 2].unsqueeze(1).expand_as(inter)
cr = torch.max(r1, r2)
cl = torch.min(l1, l2)
ct = torch.min(t1, t2)
cb = torch.max(b1, b2)
D = (((x2 - x1)**2 + (y2 - y1)**2) / ((cr-cl)**2 + (cb-ct)**2 + 1e-7))
out = inter / area_a if iscrowd else inter / union - D ** delta
return out if use_batch else out.squeeze(0)
def d2iou(box_a, box_b, delta = 0.9,iscrowd:bool=False):
"""Compute the jaccard overlap of two sets of boxes. The jaccard overlap
is simply the intersection over union of two boxes. Here we operate on
ground truth boxes and default boxes. If iscrowd=True, put the crowd in box_b.
E.g.:
A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
Args:
box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4]
box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4]
Return:
jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)]
"""
inter = intersect(box_a, box_b)
use_batch = True
if box_a.dim() == 2:
use_batch = False
box_a = box_a[None, ...]
box_b = box_b[None, ...]
inter = inter[None, ...]
area_a = ((box_a[:, :, 2]-box_a[:, :, 0]) *
(box_a[:, :, 3]-box_a[:, :, 1])).unsqueeze(2).expand_as(inter) # [A,B]
area_b = ((box_b[:, :, 2]-box_b[:, :, 0]) *
(box_b[:, :, 3]-box_b[:, :, 1])).unsqueeze(1).expand_as(inter) # [A,B]
union = area_a + area_b - inter
x1 = ((box_a[:, :, 2]+box_a[:, :, 0]) / 2).unsqueeze(2).expand_as(inter)
y1 = ((box_a[:, :, 3]+box_a[:, :, 1]) / 2).unsqueeze(2).expand_as(inter)
x2 = ((box_b[:, :, 2]+box_b[:, :, 0]) / 2).unsqueeze(1).expand_as(inter)
y2 = ((box_b[:, :, 3]+box_b[:, :, 1]) / 2).unsqueeze(1).expand_as(inter)
w1 = ((box_a[:, :, 2]-box_a[:, :, 0])).unsqueeze(2).expand_as(inter)
h1 = ((box_a[:, :, 3]-box_a[:, :, 1])).unsqueeze(2).expand_as(inter)
w2 = ((box_b[:, :, 2]-box_b[:, :, 0])).unsqueeze(1).expand_as(inter)
h2 = ((box_b[:, :, 3]-box_b[:, :, 1])).unsqueeze(1).expand_as(inter)
t1 = box_a[:, :, 1].unsqueeze(2).expand_as(inter)
b1 = box_a[:, :, 3].unsqueeze(2).expand_as(inter)
l1 = box_a[:, :, 0].unsqueeze(2).expand_as(inter)
r1 = box_a[:, :, 2].unsqueeze(2).expand_as(inter)
t2 = box_b[:, :, 1].unsqueeze(1).expand_as(inter)
b2 = box_b[:, :, 3].unsqueeze(1).expand_as(inter)
l2 = box_b[:, :, 0].unsqueeze(1).expand_as(inter)
r2 = box_b[:, :, 2].unsqueeze(1).expand_as(inter)
cr = torch.max(r1, r2)
cl = torch.min(l1, l2)
ct = torch.min(t1, t2)
cb = torch.max(b1, b2)
#D = (((x2 - x1)**2 + (y2 - y1)**2) / ((cr-cl)**2 + (cb-ct)**2 + 1e-7))
#Deform = (torch.abs(torch.log(w1/w2))+torch.abs(torch.log(h1/h2))+torch.abs(torch.log((w1*h1)/(w2*h2))))**2.5
D = torch.max(((x2 - x1) ** 2) / ((cr - cl) ** 2 + 1e-7),((y2 - y1) ** 2) / ((cb - ct) ** 2 + 1e-7))
out = inter / area_a if iscrowd else inter / union - D ** delta
return out if use_batch else out.squeeze(0)
def distance(box_a, box_b, delta = 0.9, iscrowd:bool=False):
"""Compute the jaccard overlap of two sets of boxes. The jaccard overlap
is simply the intersection over union of two boxes. Here we operate on
ground truth boxes and default boxes. If iscrowd=True, put the crowd in box_b.
E.g.:
A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
Args:
box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4]
box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4]
Return:
jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)]
"""
inter = intersect(box_a, box_b)
use_batch = True
if box_a.dim() == 2:
use_batch = False
box_a = box_a[None, ...]
box_b = box_b[None, ...]
inter = inter[None, ...]
x1 = ((box_a[:, :, 2]+box_a[:, :, 0]) / 2).unsqueeze(2).expand_as(inter)
y1 = ((box_a[:, :, 3]+box_a[:, :, 1]) / 2).unsqueeze(2).expand_as(inter)
x2 = ((box_b[:, :, 2]+box_b[:, :, 0]) / 2).unsqueeze(1).expand_as(inter)
y2 = ((box_b[:, :, 3]+box_b[:, :, 1]) / 2).unsqueeze(1).expand_as(inter)
t1 = box_a[:, :, 1].unsqueeze(2).expand_as(inter)
b1 = box_a[:, :, 3].unsqueeze(2).expand_as(inter)
l1 = box_a[:, :, 0].unsqueeze(2).expand_as(inter)
r1 = box_a[:, :, 2].unsqueeze(2).expand_as(inter)
t2 = box_b[:, :, 1].unsqueeze(1).expand_as(inter)
b2 = box_b[:, :, 3].unsqueeze(1).expand_as(inter)
l2 = box_b[:, :, 0].unsqueeze(1).expand_as(inter)
r2 = box_b[:, :, 2].unsqueeze(1).expand_as(inter)
cr = torch.max(r1, r2)
cl = torch.min(l1, l2)
ct = torch.min(t1, t2)
cb = torch.max(b1, b2)
D = (((x2 - x1)**2 + (y2 - y1)**2) / ((cr-cl)**2 + (cb-ct)**2 + 1e-7))**delta
out = D if iscrowd else D
return out if use_batch else out.squeeze(0)
def speed():
boxes = 1000*torch.rand((1000, 100, 4), dtype=torch.float)
boxscores = torch.rand((1000, 100), dtype=torch.float)
# cuda flag
cuda = 1 if torch.cuda.is_available() else 0
if cuda:
boxes = boxes.cuda()
boxscores = boxscores.cuda()
start = time.time()
for i in range(1000):
soft_nms_pytorch(boxes[i], boxscores[i], cuda=cuda)
end = time.time()
print("Average run time: %f ms" % (end-start))
def test():
# boxes and boxscores
boxes = torch.tensor([[200, 200, 400, 400],
[220, 220, 420, 420],
[200, 240, 400, 440],
[240, 200, 440, 400],
[1, 1, 2, 2]], dtype=torch.float)
boxscores = torch.tensor([0.8, 0.7, 0.6, 0.5, 0.9], dtype=torch.float)
# cuda flag
cuda = 1 if torch.cuda.is_available() else 0
if cuda:
boxes = boxes.cuda()
boxscores = boxscores.cuda()
print(soft_nms_pytorch(boxes, boxscores, cuda=cuda))
if __name__ == '__main__':
test()
# speed()
代码参考:
在经典的NMS中,得分最高的检测框和其它检测框逐一算出一个对应的IOU值,并将该值超过NMS threshold的框全部过滤掉。可以看出,在经典NMS算法中,IOU是唯一考量的因素。但是在实际应用场景中,当两个不同物体挨得很近时,由于IOU值比较大,往往经过NMS处理后,只剩下一个检测框,这样导致漏检的错误情况发生。
一个成熟的IoU衡量指标应该要考虑预测框与真实框的重叠面积、中心点距离、长宽比三个方面。但是IoU 只考虑到了预测框与真实框重叠区域,并没有考虑到中心点距离、长宽比。
基于此,DIOU-NMS就不仅仅考虑IOU,还考虑两个框中心点之间的距离。如果两个框之间IOU比较大,但是两个框的中心距离比较大时,可能会认为这是两个物体的框而不会被过滤掉。
具体的IoU、GIoU、DIoU、CIoU的细节可以看我的另一篇博客: Bounding Box regression loss: IoU Loss、GIoU Loss、DIoU Loss、CIoU Loss.
3.2、DIoU
DIoU (Distance-IoU )。简单地在IoU loss基础上添加一个惩罚项,该惩罚项用于最小化两个bbox的中心点距离。
DIoU公式:
如下图,绿色框代表真实框,黑色框代表预测框,b bb为预测框的中心,b g t b^{gt}b
gt
为真实框的中心,ρ 2 ( b , b g t ) \rho^2(b,b^{gt})ρ
2
(b,b
gt
)代表真实框与预测框中心距离的平方d 2 d^2d
2
,c cc表示两个框的最小闭包区域(同时包含了预测框和真实框的最小矩形框)的对角线长度。
Hard NMS 和 DIoU NMS比较:就是target和其他检测框的计算IoU的方式变成了DIoU,其他所有操作都和hard nms 完全相同
四、代码实现
以下代码主要实现了:hard_nms、soft_nms、diou_nms以及一些hard nms的改进版本。
import torch
import numpy as np
import torchvision
import math
def non_max_suppression(prediction, conf_thres=0.1, nms_thres=0.6, multi_cls=True, method='diou_nms'):
"""
Removes detections with lower object confidence score than 'conf_thres'
Non-Maximum Suppression to further filter detections.
param:
prediction: [batch, num_anchors, (x+y+w+h+1+num_classes)] 3个anchor的预测结果总和
conf_thres: 先进行一轮筛选,将分数过低的预测框(<conf_thres)删除(分数置0)
nms_thres: iou阈值, 如果其余预测框与target的iou>iou_thres, 就将那个预测框置0
multi_label: 是否是多标签
method: nms方法 (https://github.com/ultralytics/yolov3/issues/679)
(https://github.com/ultralytics/yolov3/pull/795)
-hard_nms: 普通的 (hard) nms 官方实现(c函数库),可支持gpu,只支持单类别输入
-hard_nms_batch: 普通的 (hard) nms 官方实现(c函数库),可支持gpu,支持多类别输入
-hard_nms_myself: 普通的 (hard) nms 自己实现的,只支持单类别输入
-and: 在hard-nms的逻辑基础上,增加是否为单独框的限制,删除没有重叠框的框(减少误检)。
-merge: 在hard-nms的基础上,增加保留框位置平滑策略(重叠框位置信息求解平均值),使框的位置更加精确。
-soft_nms: soft nms 用一个衰减函数作用在score上来代替原来的置0
-diou_nms: 普通的 (hard) nms 的基础上引入DIoU(普通的nms用的是iou)
Returns detections with shape:
(x1, y1, x2, y2, object_conf, conf, class)
"""
# Box constraints
min_wh, max_wh = 2, 4096 # (pixels) 宽度和高度的大小范围 [min_wh, max_wh]
output = [None] * len(prediction) # batch_size个output 存放最终筛选后的预测框结果
for image_i, pred in enumerate(prediction):
# 开始 pred = [12096, 25]
# 第一层过滤 根据conf_thres虑除背景目标(conf<conf_thres的目标)
pred = pred[pred[:, 4] > conf_thres] # pred = [45, 25]
# 第二层过滤 虑除超小anchor标和超大anchor x=[45, 25]
pred = pred[(pred[:, 2:4] > min_wh).all(1) & (pred[:, 2:4] < max_wh).all(1)]
# 经过前两层过滤后如果该feature map没有目标框了,就结束这轮直接进行下一个feature map
if len(pred) == 0:
continue
# 计算 score
pred[..., 5:] *= pred[..., 4:5] # score = obj_conf * cls_conf
# Box (center x, center y, width, height) to (x1, y1, x2, y2)
box = xywh2xyxy(pred[:, :4])
# Detections matrix nx6 (xyxy, conf, cls)
if multi_cls or conf_thres < 0.01:
# 第三轮过滤: 针对每个类别score(obj_conf * cls_conf) > conf_thres
# 这里一个框是有可能有多个物体的,所以要筛选
# nonzero: 获得矩阵中的非0数据的下标 t(): 将矩阵拆开
# i: 下标 j: 类别 shape=43 过滤了两个score太低的
i, j = (pred[:, 5:] > conf_thres).nonzero(as_tuple=False).t()
# pred = [43, xyxy+conf+class]
pred = torch.cat((box[i], pred[i, j + 5].unsqueeze(1), j.float().unsqueeze(1)), 1)
else: # best class only
conf, j = pred[:, 5:].max(1) # 一个类别直接取分数最大类的即可
pred = torch.cat((box, conf.unsqueeze(1), j.float().unsqueeze(1)), 1)[conf > conf_thres]
# 第三轮过滤后如果该feature map没有目标框了,就结束这轮直接进行下一个feature map
if len(pred) == 0:
continue
# 第四轮过滤 这轮可有可无,一般没什么用
# pred = pred[torch.isfinite(pred).all(1)]
# 降序排列 为NMS做准备
pred = pred[pred[:, 4].argsort(descending=True)]
# Batched NMS
# Batched NMS推理时间:0.054
if method == 'hard_nms_batch': # 普通的(hard)nms: 官方实现(c函数库),可支持gpu,但支持多类别输入
# batched_nms:参数1 [43, xyxy] 参数2 [43, score] 参数3 [43, class] 参数4 [43, nms_thres]
output[image_i] = pred[torchvision.ops.boxes.batched_nms(pred[:, :4], pred[:, 4], pred[:, 5], nms_thres)]
# print("hard_nms_batch")
continue
# All other NMS methods
det_max = [] # 存放分数最高的框 即target
cls = pred[:, -1]
for c in cls.unique(): # 对所有的种类(不重复)
dc = pred[cls == c] # dc: 选出pred中所有类别是c的结果
n = len(dc)
if n == 1:
det_max.append(dc) # No NMS required if only 1 prediction
continue
elif n > 500:# 密集性 主要考虑到NMS是一个速度慢的算法(O(n^2)),预测框太多,算法的效率太慢 所以这里筛选一下(最多500个预测框)
dc = dc[:500] # limit to first 500 boxes: https://github.com/ultralytics/yolov3/issues/117
# 推理时间:0.001
if method == 'hard_nms': # 普通的(hard)nms: 只支持单类别输入
det_max.append(dc[torchvision.ops.boxes.nms(dc[:, :4], dc[:, 4], nms_thres)])
# 推理时间:0.00299 是官方写的3倍
elif method == 'hard_nms_myself': # Hard NMS 自己写的 只支持单类别输入
while dc.shape[0]: # dc.shape[0]: 当前class的预测框数量
det_max.append(dc[:1]) # 让score最大的一个预测框(排序后的第一个)为target
if len(dc) == 1: # 出口 dc中只剩下一个框时,break
break
# dc[0] :target dc[1:] :其他预测框
diou = bbox_iou(dc[0], dc[1:]) # 计算 diou
dc = dc[1:][diou < nms_thres] # remove dious > threshold
# 在hard-nms的逻辑基础上,增加是否为单独框的限制,删除没有重叠框的框(减少误检)。
elif method == 'and': # requires overlap, single boxes erased
while len(dc) > 1:
iou = bbox_iou(dc[0], dc[1:]) # iou with other boxes
if iou.max() > 0.5: # 删除没有重叠框的框/iou小于0.5的框(减少误检)
det_max.append(dc[:1])
dc = dc[1:][iou < nms_thres] # remove ious > threshold
# 在hard-nms的基础上,增加保留框位置平滑策略(重叠框位置信息求解平均值),使框的位置更加精确。
elif method == 'merge': # weighted mixture box
while len(dc):
if len(dc) == 1:
det_max.append(dc)
break
i = bbox_iou(dc[0], dc) > nms_thres # i = True/False的集合
weights = dc[i, 4:5] # 根据i,保留所有True
dc[0, :4] = (weights * dc[i, :4]).sum(0) / weights.sum() # 重叠框位置信息求解平均值
det_max.append(dc[:1])
dc = dc[i == 0]
# 推理时间:0.0030s
elif method == 'soft_nms': # soft-NMS https://arxiv.org/abs/1704.04503
sigma = 0.5 # soft-nms sigma parameter
while len(dc):
# if len(dc) == 1: 这是U版的源码 我做了个小改动
# det_max.append(dc)
# break
# det_max.append(dc[:1])
det_max.append(dc[:1]) # 保存dc的第一行 target
if len(dc) == 1:
break
iou = bbox_iou(dc[0], dc[1:]) # 计算target与其他框的iou
# 这里和上面的直接置0不同,置0不需要管维度
dc = dc[1:] # dc=target往后的所有预测框
# dc必须不包括target及其前的预测框,因为还要和值相乘, 维度上必须相同
dc[:, 4] *= torch.exp(-iou ** 2 / sigma) # 得分衰减
dc = dc[dc[:, 4] > conf_thres]
# 推理时间:0.00299
elif method == 'diou_nms': # DIoU NMS https://arxiv.org/pdf/1911.08287.pdf
while dc.shape[0]: # dc.shape[0]: 当前class的预测框数量
det_max.append(dc[:1]) # 让score最大的一个预测框(排序后的第一个)为target
if len(dc) == 1: # 出口 dc中只剩下一个框时,break
break
# dc[0] :target dc[1:] :其他预测框
diou = bbox_iou(dc[0], dc[1:], DIoU=True) # 计算 diou
dc = dc[1:][diou < nms_thres] # remove dious > threshold 保留True 删去False
if len(det_max):
det_max = torch.cat(det_max) # concatenate 因为之前是append进det_max的
output[image_i] = det_max[(-det_max[:, 4]).argsort()] # 排序
return output
def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False):
"""iou giou diou ciou
Args:
box1: 预测框
box2: 真实框
x1y1x2y2: False
Returns:
box1和box2的IoU/GIoU/DIoU/CIoU
"""
# Returns the IoU of box1 to box2. box1 is 4, box2 is nx4
box2 = box2.t() # 转置 ???
# Get the coordinates of bounding boxes
if x1y1x2y2: # x1, y1, x2, y2 = box1
b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
else: # transform from xywh to xyxy
b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2 # b1左上角和右下角的x坐标
b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2 # b1左下角和右下角的y坐标
b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2 # b2左上角和右下角的x坐标
b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2 # b2左下角和右下角的y坐标
# Intersection area tensor.clamp(0): 将矩阵中小于0的元数变成0
inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \
(torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)
# Union Area
w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1
w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1
union = (w1 * h1 + 1e-16) + w2 * h2 - inter # 1e-16: 防止分母为0
iou = inter / union # iou
if GIoU or DIoU or CIoU:
cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1) # convex (smallest enclosing box) width
ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1) # convex height
if GIoU: # Generalized IoU https://arxiv.org/pdf/1902.09630.pdf
c_area = cw * ch + 1e-16 # convex area
return iou - (c_area - union) / c_area # return GIoU
if DIoU or CIoU: # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1
# convex diagonal squared
c2 = cw ** 2 + ch ** 2 + 1e-16
# centerpoint distance squared
rho2 = ((b2_x1 + b2_x2) - (b1_x1 + b1_x2)) ** 2 / 4 + ((b2_y1 + b2_y2) - (b1_y1 + b1_y2)) ** 2 / 4
if DIoU:
return iou - rho2 / c2 # DIoU
elif CIoU: # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47
v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2)
with torch.no_grad():
alpha = v / (1 - iou + v)
return iou - (rho2 / c2 + v * alpha) # CIoU
return iou
def xywh2xyxy(x):
# Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x)
y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x
y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y
y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x
y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y
return y
总结:
hard_nms:直接删除相邻的同类别目标,密集目标的输出不友好。
hard_nms_batch:普通的 (hard) nms 官方实现(c函数库),可支持gpu,支持多类别输入
hard_nms_myself:普通的 (hard) nms 自己实现的,只支持单类别输入
and:在hard-nms的逻辑基础上,增加是否为单独框的限制,删除没有重叠框的框(减少误检)。
merge:在hard-nms的基础上,增加保留框位置平滑策略(重叠框位置信息求解平均值),使框的位置更加精确。
soft_nms:改变其相邻同类别目标置信度(有关iou的函数),后期通过置信度阈值进行过滤,适用于目标密集的场景。
diou_nms:在hard-nms的基础上,用diou替换iou,里有参照diou的优势。
参考:【YOLO v4】常见的非极大值抑制方法:(Hard) NMS、Soft NMS、DIoU NMS_满船清梦压星河HK的博客-CSDN博客_yolov4使用的nms
图片测试方法代码:
# -*- coding:utf-8 -*-
import time
import cv2
import numpy as np
import torch
import torchvision
from utils.general import non_max_suppression_face
def soft_nms_pytorch(dets, box_scores, sigma=0.3, thresh=0.001, cuda=0):
"""
Build a pytorch implement of Soft NMS algorithm.
# Augments
dets: boxes coordinate tensor (format:[y1, x1, y2, x2])
box_scores: box score tensors
sigma: variance of Gaussian function
thresh: score thresh
cuda: CUDA flag
# Return
the index of the selected boxes
"""
# Indexes concatenate boxes with the last column
N = dets.shape[0]
if cuda:
indexes = torch.arange(0, N, dtype=torch.float).cuda().view(N, 1)
else:
indexes = torch.arange(0, N, dtype=torch.float).view(N, 1)
dets = torch.cat((dets, indexes), dim=1)
# The order of boxes coordinate is [y1,x1,y2,x2]
y1 = dets[:, 0]
x1 = dets[:, 1]
y2 = dets[:, 2]
x2 = dets[:, 3]
scores = box_scores
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
for i in range(N):
# intermediate parameters for later parameters exchange
tscore = scores[i].clone()
pos = i + 1
if i != N - 1:
maxscore, maxpos = torch.max(scores[pos:], dim=0)
if tscore < maxscore:
dets[i], dets[maxpos.item() + i + 1] = dets[maxpos.item() + i + 1].clone(), dets[i].clone()
scores[i], scores[maxpos.item() + i + 1] = scores[maxpos.item() + i + 1].clone(), scores[i].clone()
areas[i], areas[maxpos + i + 1] = areas[maxpos + i + 1].clone(), areas[i].clone()
# IoU calculate
yy1 = np.maximum(dets[i, 0].to("cpu").numpy(), dets[pos:, 0].to("cpu").numpy())
xx1 = np.maximum(dets[i, 1].to("cpu").numpy(), dets[pos:, 1].to("cpu").numpy())
yy2 = np.minimum(dets[i, 2].to("cpu").numpy(), dets[pos:, 2].to("cpu").numpy())
xx2 = np.minimum(dets[i, 3].to("cpu").numpy(), dets[pos:, 3].to("cpu").numpy())
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = torch.tensor(w * h).cuda() if cuda else torch.tensor(w * h)
ovr = torch.div(inter, (areas[i] + areas[pos:] - inter))
# Gaussian decay
weight = torch.exp(-(ovr * ovr) / sigma)
scores[pos:] = weight * scores[pos:]
# select the boxes and keep the corresponding indexes
keep = dets[:, 4][scores > thresh].int()
return keep.type(torch.long)
def cluster_nms(boxes, scores, iou_threshold: float = 0.5, top_k: int = 200):
# Collapse all the classes into 1
_, idx = scores.sort(0, descending=True)
# idx = idx[:top_k]
boxes_idx = boxes[idx]
iou = jaccard(boxes_idx, boxes_idx).triu_(diagonal=1)
B = iou
for i in range(200):
A = B
maxA, _ = torch.max(A, dim=0)
E = (maxA <= iou_threshold).float().unsqueeze(1).expand_as(A)
B = iou.mul(E)
if A.equal(B) == True:
break
idx_out = idx[maxA <= iou_threshold]
return idx_out
def cluster_diounms(boxes, scores, iou_threshold: float = 0.5, dense_mask=[], top_k: int = 200):
# Collapse all the classes into 1
_, idx = scores.sort(0, descending=True)
# idx = idx[:top_k]
boxes_idx = boxes[idx]
iou = diou(boxes_idx, boxes_idx, delta=0.7).triu_(diagonal=1)
B = iou
x_inds = (boxes_idx[:, 0] + boxes_idx[:, 2]) // 16
y_inds = (boxes_idx[:, 1] + boxes_idx[:, 3]) // 16
y_inds[y_inds >= 76] = 75
y_inds[y_inds < 0] = 0
x_inds[x_inds >= 136] = 135
x_inds[x_inds < 0] = 0
x_inds = x_inds.cpu().numpy().astype(np.int16).tolist()
y_inds = y_inds.cpu().numpy().astype(np.int16).tolist()
dense_mask = dense_mask.squeeze(dim=0).squeeze(dim=0)
dense_mask = dense_mask[y_inds, x_inds].cuda()
dense_mask[dense_mask <= iou_threshold] = iou_threshold
for i in range(200):
A = B
maxA, _ = torch.max(A, dim=0)
E = (torch.lt(maxA, dense_mask)).float().unsqueeze(1).expand_as(A)
B = iou.mul(E)
if A.equal(B) == True:
break
idx_out = idx[torch.lt(maxA, dense_mask)]
return idx_out
def cluster_SPM_nms(boxes, scores, iou_threshold: float = 0.5, top_k: int = 200):
# Collapse all the classes into 1
_, idx = scores.sort(0, descending=True)
boxes_idx = boxes[idx]
scores = scores[idx]
boxes = boxes_idx
iou = jaccard(boxes_idx, boxes_idx).triu_(diagonal=1)
B = iou
for i in range(200):
A = B
maxA, _ = torch.max(A, dim=0)
E = (maxA <= iou_threshold).float().unsqueeze(1).expand_as(A)
B = iou.mul(E)
if A.equal(B) == True:
break
scores = torch.prod(torch.exp(-B ** 2 / 0.2), 0) * scores
idx_out = scores > 0.01
return idx[idx_out]
def cluster_SPM_dist_nms(boxes, scores, iou_threshold: float = 0.5, top_k: int = 200):
# Collapse all the classes into 1
_, idx = scores.sort(0, descending=True)
boxes_idx = boxes[idx]
scores = scores[idx]
boxes = boxes_idx
iou = jaccard(boxes_idx, boxes_idx).triu_(diagonal=1)
B = iou
for i in range(200):
A = B
maxA, _ = torch.max(A, dim=0)
E = (maxA <= iou_threshold).float().unsqueeze(1).expand_as(A)
B = iou.mul(E)
if A.equal(B) == True:
break
D = distance(boxes, boxes, delta=0.7)
X = (B >= 0).float()
scores = torch.prod(torch.min(torch.exp(-B ** 2 / 0.2) + D * ((B > 0).float()), X), 0) * scores
idx_out = scores > 0.15
return idx[idx_out]
def intersect(box_a, box_b):
""" We resize both tensors to [A,B,2] without new malloc:
[A,2] -> [A,1,2] -> [A,B,2]
[B,2] -> [1,B,2] -> [A,B,2]
Then we compute the area of intersect between box_a and box_b.
Args:
box_a: (tensor) bounding boxes, Shape: [A,4].
box_b: (tensor) bounding boxes, Shape: [B,4].
Return:
(tensor) intersection area, Shape: [A,B].
"""
A = box_a.size(0)
B = box_b.size(0)
max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2), box_b[:, 2:].unsqueeze(0).expand(A, B, 2))
min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2), box_b[:, :2].unsqueeze(0).expand(A, B, 2))
inter = torch.clamp((max_xy - min_xy), min=0)
return inter[:, :, 0] * inter[:, :, 1]
def jaccard(box_a, box_b, iscrowd=False):
"""Compute the jaccard overlap of two sets of boxes. The jaccard overlap
is simply the intersection over union of two boxes. Here we operate on
ground truth boxes and default boxes. If iscrowd=True, put the crowd in box_b.
E.g.:
A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
Args:
box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4]
box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4]
Return:
jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)]
"""
inter = intersect(box_a, box_b)
area_a = ((box_a[:, 2] - box_a[:, 0]) * (box_a[:, 3] - box_a[:, 1])).unsqueeze(1).expand_as(inter) # [A,B]
area_b = ((box_b[:, 2] - box_b[:, 0]) * (box_b[:, 3] - box_b[:, 1])).unsqueeze(0).expand_as(inter) # [A,B]
union = area_a + area_b - inter
if iscrowd:
return inter / area_a
else:
return inter / union # [A,B]
def diou(box_a, box_b, delta=0.9, iscrowd: bool = False):
"""Compute the jaccard overlap of two sets of boxes. The jaccard overlap
is simply the intersection over union of two boxes. Here we operate on
ground truth boxes and default boxes. If iscrowd=True, put the crowd in box_b.
E.g.:
A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
Args:
box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4]
box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4]
Return:
jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)]
"""
inter = intersect(box_a, box_b)
use_batch = True
if box_a.dim() == 2:
use_batch = False
box_a = box_a[None, ...]
box_b = box_b[None, ...]
inter = inter[None, ...]
area_a = ((box_a[:, :, 2] - box_a[:, :, 0]) * (box_a[:, :, 3] - box_a[:, :, 1])).unsqueeze(2).expand_as(
inter) # [A,B]
area_b = ((box_b[:, :, 2] - box_b[:, :, 0]) * (box_b[:, :, 3] - box_b[:, :, 1])).unsqueeze(1).expand_as(
inter) # [A,B]
union = area_a + area_b - inter
x1 = ((box_a[:, :, 2] + box_a[:, :, 0]) / 2).unsqueeze(2).expand_as(inter)
y1 = ((box_a[:, :, 3] + box_a[:, :, 1]) / 2).unsqueeze(2).expand_as(inter)
x2 = ((box_b[:, :, 2] + box_b[:, :, 0]) / 2).unsqueeze(1).expand_as(inter)
y2 = ((box_b[:, :, 3] + box_b[:, :, 1]) / 2).unsqueeze(1).expand_as(inter)
t1 = box_a[:, :, 1].unsqueeze(2).expand_as(inter)
b1 = box_a[:, :, 3].unsqueeze(2).expand_as(inter)
l1 = box_a[:, :, 0].unsqueeze(2).expand_as(inter)
r1 = box_a[:, :, 2].unsqueeze(2).expand_as(inter)
t2 = box_b[:, :, 1].unsqueeze(1).expand_as(inter)
b2 = box_b[:, :, 3].unsqueeze(1).expand_as(inter)
l2 = box_b[:, :, 0].unsqueeze(1).expand_as(inter)
r2 = box_b[:, :, 2].unsqueeze(1).expand_as(inter)
cr = torch.max(r1, r2)
cl = torch.min(l1, l2)
ct = torch.min(t1, t2)
cb = torch.max(b1, b2)
D = (((x2 - x1) ** 2 + (y2 - y1) ** 2) / ((cr - cl) ** 2 + (cb - ct) ** 2 + 1e-7))
out = inter / area_a if iscrowd else inter / union - D ** delta
return out if use_batch else out.squeeze(0)
def d2iou(box_a, box_b, delta=0.9, iscrowd: bool = False):
"""Compute the jaccard overlap of two sets of boxes. The jaccard overlap
is simply the intersection over union of two boxes. Here we operate on
ground truth boxes and default boxes. If iscrowd=True, put the crowd in box_b.
E.g.:
A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
Args:
box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4]
box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4]
Return:
jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)]
"""
inter = intersect(box_a, box_b)
use_batch = True
if box_a.dim() == 2:
use_batch = False
box_a = box_a[None, ...]
box_b = box_b[None, ...]
inter = inter[None, ...]
area_a = ((box_a[:, :, 2] - box_a[:, :, 0]) * (box_a[:, :, 3] - box_a[:, :, 1])).unsqueeze(2).expand_as(
inter) # [A,B]
area_b = ((box_b[:, :, 2] - box_b[:, :, 0]) * (box_b[:, :, 3] - box_b[:, :, 1])).unsqueeze(1).expand_as(
inter) # [A,B]
union = area_a + area_b - inter
x1 = ((box_a[:, :, 2] + box_a[:, :, 0]) / 2).unsqueeze(2).expand_as(inter)
y1 = ((box_a[:, :, 3] + box_a[:, :, 1]) / 2).unsqueeze(2).expand_as(inter)
x2 = ((box_b[:, :, 2] + box_b[:, :, 0]) / 2).unsqueeze(1).expand_as(inter)
y2 = ((box_b[:, :, 3] + box_b[:, :, 1]) / 2).unsqueeze(1).expand_as(inter)
w1 = ((box_a[:, :, 2] - box_a[:, :, 0])).unsqueeze(2).expand_as(inter)
h1 = ((box_a[:, :, 3] - box_a[:, :, 1])).unsqueeze(2).expand_as(inter)
w2 = ((box_b[:, :, 2] - box_b[:, :, 0])).unsqueeze(1).expand_as(inter)
h2 = ((box_b[:, :, 3] - box_b[:, :, 1])).unsqueeze(1).expand_as(inter)
t1 = box_a[:, :, 1].unsqueeze(2).expand_as(inter)
b1 = box_a[:, :, 3].unsqueeze(2).expand_as(inter)
l1 = box_a[:, :, 0].unsqueeze(2).expand_as(inter)
r1 = box_a[:, :, 2].unsqueeze(2).expand_as(inter)
t2 = box_b[:, :, 1].unsqueeze(1).expand_as(inter)
b2 = box_b[:, :, 3].unsqueeze(1).expand_as(inter)
l2 = box_b[:, :, 0].unsqueeze(1).expand_as(inter)
r2 = box_b[:, :, 2].unsqueeze(1).expand_as(inter)
cr = torch.max(r1, r2)
cl = torch.min(l1, l2)
ct = torch.min(t1, t2)
cb = torch.max(b1, b2)
# D = (((x2 - x1)**2 + (y2 - y1)**2) / ((cr-cl)**2 + (cb-ct)**2 + 1e-7))
# Deform = (torch.abs(torch.log(w1/w2))+torch.abs(torch.log(h1/h2))+torch.abs(torch.log((w1*h1)/(w2*h2))))**2.5
D = torch.max(((x2 - x1) ** 2) / ((cr - cl) ** 2 + 1e-7), ((y2 - y1) ** 2) / ((cb - ct) ** 2 + 1e-7))
out = inter / area_a if iscrowd else inter / union - D ** delta
return out if use_batch else out.squeeze(0)
def distance(box_a, box_b, delta=0.9, iscrowd: bool = False):
"""Compute the jaccard overlap of two sets of boxes. The jaccard overlap
is simply the intersection over union of two boxes. Here we operate on
ground truth boxes and default boxes. If iscrowd=True, put the crowd in box_b.
E.g.:
A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
Args:
box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4]
box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4]
Return:
jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)]
"""
inter = intersect(box_a, box_b)
use_batch = True
if box_a.dim() == 2:
use_batch = False
box_a = box_a[None, ...]
box_b = box_b[None, ...]
inter = inter[None, ...]
x1 = ((box_a[:, :, 2] + box_a[:, :, 0]) / 2).unsqueeze(2).expand_as(inter)
y1 = ((box_a[:, :, 3] + box_a[:, :, 1]) / 2).unsqueeze(2).expand_as(inter)
x2 = ((box_b[:, :, 2] + box_b[:, :, 0]) / 2).unsqueeze(1).expand_as(inter)
y2 = ((box_b[:, :, 3] + box_b[:, :, 1]) / 2).unsqueeze(1).expand_as(inter)
t1 = box_a[:, :, 1].unsqueeze(2).expand_as(inter)
b1 = box_a[:, :, 3].unsqueeze(2).expand_as(inter)
l1 = box_a[:, :, 0].unsqueeze(2).expand_as(inter)
r1 = box_a[:, :, 2].unsqueeze(2).expand_as(inter)
t2 = box_b[:, :, 1].unsqueeze(1).expand_as(inter)
b2 = box_b[:, :, 3].unsqueeze(1).expand_as(inter)
l2 = box_b[:, :, 0].unsqueeze(1).expand_as(inter)
r2 = box_b[:, :, 2].unsqueeze(1).expand_as(inter)
cr = torch.max(r1, r2)
cl = torch.min(l1, l2)
ct = torch.min(t1, t2)
cb = torch.max(b1, b2)
D = (((x2 - x1) ** 2 + (y2 - y1) ** 2) / ((cr - cl) ** 2 + (cb - ct) ** 2 + 1e-7)) ** delta
out = D if iscrowd else D
return out if use_batch else out.squeeze(0)
def speed():
boxes = 1000 * torch.rand((1000, 100, 4), dtype=torch.float)
boxscores = torch.rand((1000, 100), dtype=torch.float)
# cuda flag
cuda = 1 if torch.cuda.is_available() else 0
if cuda:
boxes = boxes.cuda()
boxscores = boxscores.cuda()
start = time.time()
for i in range(1000):
soft_nms_pytorch(boxes[i], boxscores[i], cuda=cuda)
end = time.time()
print("Average run time: %f ms" % (end - start))
def test():
# boxes and boxscores
boxes = torch.tensor([
[683, 33, 1188, 715],
[187,198,753,720], [187, 198, 1188, 720],
# [200, 200, 400, 400],
# [220, 220, 420, 420]
],
dtype=torch.float)
image = cv2.imread('data/images/zidane.jpg')
image2=image.copy()
for index, box in enumerate(boxes):
color=(0, 255, 255)
if index==1:
color = (0, 127, 0)
cv2.rectangle(image2, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])),color, 2)
cv2.imshow("image_0",image2)
cv2.waitKey(1)
boxscores = torch.tensor([0.8, 0.7,0.7], dtype=torch.float)
nms_thres=0.5
# cuda flag
cuda = 1 if torch.cuda.is_available() else 0
if cuda:
boxes = boxes.cuda()
boxscores = boxscores.cuda()
method='standard'
if method == 'standard':
nms_indices = torchvision.ops.nms(boxes, boxscores, nms_thres)
# nms_indices = nms(pred[:, :4], pred[:, 4], nms_thres)
elif method == 'soft':
nms_indices = soft_nms_pytorch(boxes, boxscores, sigma=0.5, thresh=0.2, cuda=cuda)
elif method == "cluster":
nms_indices = cluster_nms(boxes, boxscores, nms_thres)
elif method == "cluster_SPM":
nms_indices = cluster_SPM_nms(boxes, boxscores, nms_thres)
elif method == "cluster_diou":
nms_indices = cluster_diounms(boxes, boxscores, nms_thres)
elif method == "cluster_SPM_dist":
nms_indices = cluster_SPM_dist_nms(boxes, boxscores, nms_thres)
# keep=soft_nms_pytorch(boxes, boxscores, cuda=cuda)
print(nms_indices)
for index in nms_indices:
box = boxes[index]
cv2.rectangle(image, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (0, 0, 255), 2)
cv2.imshow("result",image)
cv2.waitKey()
if __name__ == '__main__':
test()
diou测试:
import cv2
import torch
def diou_box_nms(boxes,scores, iou_thres):
if boxes.shape[0] == 0:
return torch.zeros(0 ,device=boxes.device).long()
x1 ,y1 ,x2 ,y2 = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[: ,3]
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
order = torch.sort(scores, descending=True)[1] # (?,)
keep =[]
while order.numel() > 0:
if order.numel() == 1:
keep.append(order.item())
break
else:
i = order[0].item()
keep.append(i)
xmin = torch.clamp(x1[order[1:]], min=float(x1[i]))
ymin = torch.clamp(y1[order[1:]], min=float(y1[i]))
xmax = torch.clamp(x2[order[1:]], max=float(x2[i]))
ymax = torch.clamp(y2[order[1:]], max=float(y2[i]))
inter_area = torch.clamp(xmax - xmin, min=0.0) * torch.clamp(ymax - ymin, min=0.0)
iou = inter_area / (areas[i] + areas[order[1:]] - inter_area + 1e-16)
# diou add center
# inter_diag
cxpreds = (x2[i] + x1[i]) / 2
cypreds = (y2[i] + y1[i]) / 2
cxbbox = (x2[order[1:]] + x1[order[1:]]) / 2
cybbox = (y1[order[1:]] + y2[order[1:]]) / 2
inter_diag = (cxbbox - cxpreds) ** 2 + (cybbox - cypreds) ** 2
# outer_diag
ox1 = torch.min(x1[order[1:]], x1[i])
oy1 = torch.min(y1[order[1:]], y1[i])
ox2 = torch.max(x2[order[1:]], x2[i])
oy2 = torch.max(y2[order[1:]], y2[i])
outer_diag = (ox1 - ox2) ** 2 + (oy1 - oy2) ** 2
diou = iou - inter_diag / outer_diag
diou = torch.clamp(diou, min=-1.0, max=1.0)
# mask_ind = (iou <= iou_thres).nonzero().squeeze()
mask_ind = (diou <= iou_thres).nonzero().squeeze()
if mask_ind.numel() == 0:
break
order = order[mask_ind + 1]
return torch.LongTensor(keep)
if __name__ == '__main__':
boxes = torch.tensor([[683, 33, 1188, 715], [187, 198, 753, 720], [187, 198, 1188, 720], # [200, 200, 400, 400],
# [220, 220, 420, 420]
], dtype=torch.float)
image = cv2.imread('../inference/images/zidane.jpg')
image2 = image.copy()
for index, box in enumerate(boxes):
color = (0, 255, 255)
if index == 1:
color = (0, 127, 0)
cv2.rectangle(image2, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), color, 2)
cv2.imshow("image_0", image2)
cv2.waitKey(1)
scores = torch.tensor([0.8, 0.7, 0.7], dtype=torch.float)
keep=diou_box_nms(boxes,scores,0.6)
print(keep)
for index in keep:
box = boxes[index]
cv2.rectangle(image, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (0, 0, 255), 2)
cv2.imshow("result", image)
cv2.waitKey()
iou-giou-diou-ciou-nms python numpy 代码例子
转自:
iou-giou-diou-ciou-nms相关code_小猪猪爱吃饭的博客-CSDN博客
import math
import numpy
def iou(box1,box2):
x1,y1,x2,y2=box1
x3,y3,x4,y4=box2
area1=max(0,x2-x1+1)*max(0,y2-y1+1)
area2=max(0,x4-x3+1)*max(0,y4-y3+1)
xx1=max(x1,x3)
yy1=max(y1,y3)
xx2=min(x2,x4)
yy2=min(y2,y4)
w=max(0,xx2-xx1+1)
h=max(0,yy2-yy1+1)
inter=w*h
u=area1+area2-inter
return inter/u
def giou(box1,box2):
x1, y1, x2, y2 = box1
x3, y3, x4, y4 = box2
area1 = max(0, x2 - x1 + 1) * max(0, y2 - y1 + 1)
area2 = max(0, x4 - x3 + 1) * max(0, y4 - y3 + 1)
xx1 = max(x1, x3)
yy1 = max(y1, y3)
xx2 = min(x2, x4)
yy2 = min(y2, y4)
ae_x1=min(x1,x3)
ae_y1=min(y1,y3)
ae_x2=max(x2,x4)
ae_y2=max(y2,y4)
inter=max(0,xx2-xx1+1)*max(0,yy2-yy1+1)
u = area1 + area2 - inter
ae=max(0,ae_x2-ae_x1+1)*max(ae_y2-ae_y1+1)
return inter/u-(ae-u)/ae
def diou(box1,box2):
x1, y1, x2, y2 = box1
x3, y3, x4, y4 = box2
area1 = max(0, x2 - x1 + 1) * max(0, y2 - y1 + 1)
area2 = max(0, x4 - x3 + 1) * max(0, y4 - y3 + 1)
c1_x,c1_y=(x2+x1)/2,(y2+y1)/2
c2_x,c2_y=(x4+x3)/2,(y4+y3)/2
p2=(c2_x-c1_x)*(c2_x-c1_x)+(c2_y-c1_y)*(c2_y-c1_y)
xx1 = max(x1, x3)
yy1 = max(y1, y3)
xx2 = min(x2, x4)
yy2 = min(y2, y4)
ae_x1=min(x1,x3)
ae_y1=min(y1,y3)
ae_x2=max(x2,x4)
ae_y2=max(y2,y4)
inter=max(0,xx2-xx1+1)*max(0,yy2-yy1+1)
u = area1 + area2 - inter
c2=(ae_x2-ae_x1)*(ae_x2-ae_x1)+(ae_y2-ae_y1)*(ae_y2-ae_y1)
return inter/u-p2/c2
def ciou(box1, box2,alpha):
x1, y1, x2, y2 = box1
x3, y3, x4, y4 = box2
w1,h1=max(x2-x1+1,0),max(y2-y1+1,1e-15)
w2, h2 = max(x4 - x3 + 1, 0), max(y4 - y3 + 1, 1e-15)
area1 = max(0, x2 - x1 + 1) * max(0, y2 - y1 + 1)
area2 = max(0, x4 - x3 + 1) * max(0, y4 - y3 + 1)
c1_x, c1_y = (x2 + x1) / 2, (y2 + y1) / 2
c2_x, c2_y = (x4 + x3) / 2, (y4 + y3) / 2
p2 = (c2_x - c1_x) * (c2_x - c1_x) + (c2_y - c1_y) * (c2_y - c1_y)
xx1 = max(x1, x3)
yy1 = max(y1, y3)
xx2 = min(x2, x4)
yy2 = min(y2, y4)
ae_x1 = min(x1, x3)
ae_y1 = min(y1, y3)
ae_x2 = max(x2, x4)
ae_y2 = max(y2, y4)
inter = max(0, xx2 - xx1 + 1) * max(0, yy2 - yy1 + 1)
u = area1 + area2 - inter
c2 = (ae_x2 - ae_x1) * (ae_x2 - ae_x1) + (ae_y2 - ae_y1) * (ae_y2 - ae_y1)
v=4*((numpy.arctan(w1/h1)-numpy.arctan(w2/h2))**2)/(numpy.pi**2)
return inter / u - p2 / c2-alpha*v
def nms(dets,thre):
x1,y1,x2,y2,s=dets[:0],dets[:1],dets[:2],dets[:3],dets[:4]
order=numpy.argsort(s)[::-1]
keep=[]
area=(x2-x1+1)*(y2-y1+1)
while order:
i=order[0]
keep.append(i)
xx1=numpy.maximum(x1[i],x1[order[1:]])
yy1 = numpy.maximum(y1[i], y1[order[1:]])
xx2 = numpy.minimum(x2[i], x2[order[1:]])
yy2 = numpy.minimum(y2[i], y2[order[1:]])
w=numpy.maximum(xx2-xx1+1,0)
h=numpy.maximum(yy2-yy1+1,0)
inter=w*h
iou=inter/(area[i]+area[order[1:]]-inter)
inds=numpy.where(iou<=thre)[0]
order=order[inds+1]
return keep