yolov5 soft_nms cluster_nms,cluster_SPM_nms,cluster_diounms,cluster_SPM_dist_nms,diou_nms

AI算法网奇

已于 2022-10-13 23:15:54 修改

阅读量3.8k

点赞数 1

分类专栏：目标检测文章标签： pytorch 深度学习人工智能

于 2021-07-01 17:32:55 首次发布

本文链接：https://blog.csdn.net/jacke121/article/details/118391514

版权

目标检测专栏收录该内容

130 篇文章 36 订阅

订阅专栏

yolov5 cluster_nms,cluster_SPM_nms,cluster_diounms,cluster_SPM_dist_nms,diou_nms

图片测试方法代码：

diou测试：

iou-giou-diou-ciou-nms python numpy 代码例子

yolov5 cluster_nms,cluster_SPM_nms,cluster_diounms,cluster_SPM_dist_nms,diou_nms

本机项目：

yolov5_mangguo_new

调用：

        # Non-maximum suppression
        if method == 'standard':
            nms_indices = nms(pred[:, :4], pred[:, 4], nms_thres)
        elif method == 'soft':
            nms_indices = soft_nms_pytorch(pred[:, :4], pred[:, 4], sigma=0.5, thresh=0.2, cuda=1)
        elif method == "cluster":
            nms_indices = cluster_nms(pred[:, :4], pred[:, 4], nms_thres)
        elif method == "cluster_SPM":
            nms_indices = cluster_SPM_nms(pred[:, :4], pred[:, 4], nms_thres)
        elif method == "cluster_diou":
            nms_indices = cluster_diounms(pred[:, :4], pred[:, 4], nms_thres)
        elif method == "cluster_SPM_dist":
            nms_indices = cluster_SPM_dist_nms(pred[:, :4], pred[:, 4], nms_thres)
        else:
            raise ValueError('Invalid NMS type!')

# -*- coding:utf-8 -*-
import time
import numpy as np
import torch

def soft_nms_pytorch(dets, box_scores, sigma=0.5, thresh=0.001, cuda=0):
    """
    Build a pytorch implement of Soft NMS algorithm.
    # Augments
        dets:        boxes coordinate tensor (format:[y1, x1, y2, x2])
        box_scores:  box score tensors
        sigma:       variance of Gaussian function
        thresh:      score thresh
        cuda:        CUDA flag
    # Return
        the index of the selected boxes
    """

    # Indexes concatenate boxes with the last column
    N = dets.shape[0]
    if cuda:
        indexes = torch.arange(0, N, dtype=torch.float).cuda().view(N, 1)
    else:
        indexes = torch.arange(0, N, dtype=torch.float).view(N, 1)
    dets = torch.cat((dets, indexes), dim=1)

    # The order of boxes coordinate is [y1,x1,y2,x2]
    y1 = dets[:, 0]
    x1 = dets[:, 1]
    y2 = dets[:, 2]
    x2 = dets[:, 3]
    scores = box_scores
    areas = (x2 - x1 + 1) * (y2 - y1 + 1)

    for i in range(N):
        # intermediate parameters for later parameters exchange
        tscore = scores[i].clone()
        pos = i + 1

        if i != N - 1:
            maxscore, maxpos = torch.max(scores[pos:], dim=0)
            if tscore < maxscore:
                dets[i], dets[maxpos.item() + i + 1] = dets[maxpos.item() + i + 1].clone(), dets[i].clone()
                scores[i], scores[maxpos.item() + i + 1] = scores[maxpos.item() + i + 1].clone(), scores[i].clone()
                areas[i], areas[maxpos + i + 1] = areas[maxpos + i + 1].clone(), areas[i].clone()

        # IoU calculate
        yy1 = np.maximum(dets[i, 0].to("cpu").numpy(), dets[pos:, 0].to("cpu").numpy())
        xx1 = np.maximum(dets[i, 1].to("cpu").numpy(), dets[pos:, 1].to("cpu").numpy())
        yy2 = np.minimum(dets[i, 2].to("cpu").numpy(), dets[pos:, 2].to("cpu").numpy())
        xx2 = np.minimum(dets[i, 3].to("cpu").numpy(), dets[pos:, 3].to("cpu").numpy())
        
        w = np.maximum(0.0, xx2 - xx1 + 1)
        h = np.maximum(0.0, yy2 - yy1 + 1)
        inter = torch.tensor(w * h).cuda() if cuda else torch.tensor(w * h)
        ovr = torch.div(inter, (areas[i] + areas[pos:] - inter))

        # Gaussian decay
        weight = torch.exp(-(ovr * ovr) / sigma)
        scores[pos:] = weight * scores[pos:]

    # select the boxes and keep the corresponding indexes
    keep = dets[:, 4][scores > thresh].int()

    return keep.type(torch.long)


def cluster_nms(boxes, scores, iou_threshold: float = 0.5, top_k: int = 200):
    # Collapse all the classes into 1
    _, idx = scores.sort(0, descending=True)
    #idx = idx[:top_k]
    boxes_idx = boxes[idx]
    iou = jaccard(boxes_idx, boxes_idx).triu_(diagonal=1)
    B = iou
    for i in range(200):
        A = B
        maxA, _ = torch.max(A, dim=0)
        E = (maxA <= iou_threshold).float().unsqueeze(1).expand_as(A)
        B = iou.mul(E)
        if A.equal(B) == True:
            break
    idx_out = idx[maxA <= iou_threshold]
    return idx_out

def cluster_diounms(boxes, scores, iou_threshold: float = 0.5,dense_mask=[], top_k: int = 200):
    # Collapse all the classes into 1
    _, idx = scores.sort(0, descending=True)
    #idx = idx[:top_k]
    boxes_idx = boxes[idx]
    iou = diou(boxes_idx, boxes_idx,delta = 0.7).triu_(diagonal=1)
    B = iou
    x_inds = (boxes_idx[:, 0] + boxes_idx[:, 2]) // 16
    y_inds = (boxes_idx[:, 1] + boxes_idx[:, 3]) // 16
    y_inds[y_inds >= 76] = 75
    y_inds[y_inds < 0] = 0
    x_inds[x_inds >= 136] = 135
    x_inds[x_inds < 0] = 0
    x_inds = x_inds.cpu().numpy().astype(np.int16).tolist()
    y_inds = y_inds.cpu().numpy().astype(np.int16).tolist()
    dense_mask = dense_mask.squeeze(dim=0).squeeze(dim=0)
    dense_mask = dense_mask[y_inds, x_inds].cuda()
    dense_mask[dense_mask <= iou_threshold] = iou_threshold
    for i in range(200):
        A = B
        maxA, _ = torch.max(A, dim=0)
        E = (torch.lt(maxA,dense_mask)).float().unsqueeze(1).expand_as(A)
        B = iou.mul(E)
        if A.equal(B) == True:
            break
    idx_out = idx[torch.lt(maxA,dense_mask)]
    return idx_out

def cluster_SPM_nms(boxes, scores, iou_threshold:float=0.5, top_k:int=200):
    # Collapse all the classes into 1
    _, idx = scores.sort(0, descending=True)
    boxes_idx = boxes[idx]
    scores = scores[idx]
    boxes = boxes_idx
    iou = jaccard(boxes_idx, boxes_idx).triu_(diagonal=1)
    B = iou
    for i in range(200):
        A=B
        maxA,_=torch.max(A, dim=0)
        E = (maxA<=iou_threshold).float().unsqueeze(1).expand_as(A)
        B=iou.mul(E)
        if A.equal(B)==True:
            break
    scores = torch.prod(torch.exp(-B**2/0.2),0)*scores
    idx_out = scores > 0.01
    return idx[idx_out]

def cluster_SPM_dist_nms(boxes, scores, iou_threshold:float=0.5, top_k:int=200):
    # Collapse all the classes into 1
    _, idx = scores.sort(0, descending=True)
    boxes_idx = boxes[idx]
    scores = scores[idx]
    boxes = boxes_idx
    iou = jaccard(boxes_idx, boxes_idx).triu_(diagonal=1)
    B = iou
    for i in range(200):
        A=B
        maxA,_=torch.max(A, dim=0)
        E = (maxA<=iou_threshold).float().unsqueeze(1).expand_as(A)
        B=iou.mul(E)
        if A.equal(B)==True:
            break
    D=distance(boxes, boxes,delta = 0.7)
    X = (B>=0).float()
    scores = torch.prod(torch.min(torch.exp(-B**2/0.2)+D*((B>0).float()),X),0)*scores
    idx_out = scores > 0.15

    return idx[idx_out]

def intersect(box_a, box_b):
    """ We resize both tensors to [A,B,2] without new malloc:
    [A,2] -> [A,1,2] -> [A,B,2]
    [B,2] -> [1,B,2] -> [A,B,2]
    Then we compute the area of intersect between box_a and box_b.
    Args:
      box_a: (tensor) bounding boxes, Shape: [A,4].
      box_b: (tensor) bounding boxes, Shape: [B,4].
    Return:
      (tensor) intersection area, Shape: [A,B].
    """
    A = box_a.size(0)
    B = box_b.size(0)
    max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2),
                       box_b[:, 2:].unsqueeze(0).expand(A, B, 2))
    min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2),
                       box_b[:, :2].unsqueeze(0).expand(A, B, 2))
    inter = torch.clamp((max_xy - min_xy), min=0)
    return inter[:, :, 0] * inter[:, :, 1]


def jaccard(box_a, box_b, iscrowd=False):
    """Compute the jaccard overlap of two sets of boxes.  The jaccard overlap
    is simply the intersection over union of two boxes.  Here we operate on
    ground truth boxes and default boxes. If iscrowd=True, put the crowd in box_b.
    E.g.:
        A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
    Args:
        box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4]
        box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4]
    Return:
        jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)]
    """
    inter = intersect(box_a, box_b)
    area_a = ((box_a[:, 2]-box_a[:, 0]) *
              (box_a[:, 3]-box_a[:, 1])).unsqueeze(1).expand_as(inter)  # [A,B]
    area_b = ((box_b[:, 2]-box_b[:, 0]) *
              (box_b[:, 3]-box_b[:, 1])).unsqueeze(0).expand_as(inter)  # [A,B]
    union = area_a + area_b - inter

    if iscrowd:
        return inter / area_a
    else:
        return inter / union  # [A,B]


def diou(box_a, box_b, delta = 0.9,iscrowd:bool=False):
    """Compute the jaccard overlap of two sets of boxes.  The jaccard overlap
    is simply the intersection over union of two boxes.  Here we operate on
    ground truth boxes and default boxes. If iscrowd=True, put the crowd in box_b.
    E.g.:
        A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
    Args:
        box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4]
        box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4]
    Return:
        jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)]
    """
    inter = intersect(box_a, box_b)
    use_batch = True
    if box_a.dim() == 2:
        use_batch = False
        box_a = box_a[None, ...]
        box_b = box_b[None, ...]
        inter = inter[None, ...]

    area_a = ((box_a[:, :, 2]-box_a[:, :, 0]) *
              (box_a[:, :, 3]-box_a[:, :, 1])).unsqueeze(2).expand_as(inter)  # [A,B]
    area_b = ((box_b[:, :, 2]-box_b[:, :, 0]) *
              (box_b[:, :, 3]-box_b[:, :, 1])).unsqueeze(1).expand_as(inter)  # [A,B]
    union = area_a + area_b - inter
    x1 = ((box_a[:, :, 2]+box_a[:, :, 0]) / 2).unsqueeze(2).expand_as(inter)
    y1 = ((box_a[:, :, 3]+box_a[:, :, 1]) / 2).unsqueeze(2).expand_as(inter)
    x2 = ((box_b[:, :, 2]+box_b[:, :, 0]) / 2).unsqueeze(1).expand_as(inter)
    y2 = ((box_b[:, :, 3]+box_b[:, :, 1]) / 2).unsqueeze(1).expand_as(inter)

    t1 = box_a[:, :, 1].unsqueeze(2).expand_as(inter)
    b1 = box_a[:, :, 3].unsqueeze(2).expand_as(inter)
    l1 = box_a[:, :, 0].unsqueeze(2).expand_as(inter)
    r1 = box_a[:, :, 2].unsqueeze(2).expand_as(inter)

    t2 = box_b[:, :, 1].unsqueeze(1).expand_as(inter)
    b2 = box_b[:, :, 3].unsqueeze(1).expand_as(inter)
    l2 = box_b[:, :, 0].unsqueeze(1).expand_as(inter)
    r2 = box_b[:, :, 2].unsqueeze(1).expand_as(inter)
    cr = torch.max(r1, r2)
    cl = torch.min(l1, l2)
    ct = torch.min(t1, t2)
    cb = torch.max(b1, b2)
    D = (((x2 - x1)**2 + (y2 - y1)**2) / ((cr-cl)**2 + (cb-ct)**2 + 1e-7))
    out = inter / area_a if iscrowd else inter / union - D ** delta
    return out if use_batch else out.squeeze(0)

def d2iou(box_a, box_b, delta = 0.9,iscrowd:bool=False):
    """Compute the jaccard overlap of two sets of boxes.  The jaccard overlap
    is simply the intersection over union of two boxes.  Here we operate on
    ground truth boxes and default boxes. If iscrowd=True, put the crowd in box_b.
    E.g.:
        A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
    Args:
        box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4]
        box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4]
    Return:
        jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)]
    """
    inter = intersect(box_a, box_b)
    use_batch = True
    if box_a.dim() == 2:
        use_batch = False
        box_a = box_a[None, ...]
        box_b = box_b[None, ...]
        inter = inter[None, ...]

    area_a = ((box_a[:, :, 2]-box_a[:, :, 0]) *
              (box_a[:, :, 3]-box_a[:, :, 1])).unsqueeze(2).expand_as(inter)  # [A,B]
    area_b = ((box_b[:, :, 2]-box_b[:, :, 0]) *
              (box_b[:, :, 3]-box_b[:, :, 1])).unsqueeze(1).expand_as(inter)  # [A,B]
    union = area_a + area_b - inter
    x1 = ((box_a[:, :, 2]+box_a[:, :, 0]) / 2).unsqueeze(2).expand_as(inter)
    y1 = ((box_a[:, :, 3]+box_a[:, :, 1]) / 2).unsqueeze(2).expand_as(inter)
    x2 = ((box_b[:, :, 2]+box_b[:, :, 0]) / 2).unsqueeze(1).expand_as(inter)
    y2 = ((box_b[:, :, 3]+box_b[:, :, 1]) / 2).unsqueeze(1).expand_as(inter)
    w1 = ((box_a[:, :, 2]-box_a[:, :, 0])).unsqueeze(2).expand_as(inter)
    h1 = ((box_a[:, :, 3]-box_a[:, :, 1])).unsqueeze(2).expand_as(inter)
    w2 = ((box_b[:, :, 2]-box_b[:, :, 0])).unsqueeze(1).expand_as(inter)
    h2 = ((box_b[:, :, 3]-box_b[:, :, 1])).unsqueeze(1).expand_as(inter)

    t1 = box_a[:, :, 1].unsqueeze(2).expand_as(inter)
    b1 = box_a[:, :, 3].unsqueeze(2).expand_as(inter)
    l1 = box_a[:, :, 0].unsqueeze(2).expand_as(inter)
    r1 = box_a[:, :, 2].unsqueeze(2).expand_as(inter)

    t2 = box_b[:, :, 1].unsqueeze(1).expand_as(inter)
    b2 = box_b[:, :, 3].unsqueeze(1).expand_as(inter)
    l2 = box_b[:, :, 0].unsqueeze(1).expand_as(inter)
    r2 = box_b[:, :, 2].unsqueeze(1).expand_as(inter)
    cr = torch.max(r1, r2)
    cl = torch.min(l1, l2)
    ct = torch.min(t1, t2)
    cb = torch.max(b1, b2)
    #D = (((x2 - x1)**2 + (y2 - y1)**2) / ((cr-cl)**2 + (cb-ct)**2 + 1e-7))
    #Deform = (torch.abs(torch.log(w1/w2))+torch.abs(torch.log(h1/h2))+torch.abs(torch.log((w1*h1)/(w2*h2))))**2.5
    D = torch.max(((x2 - x1) ** 2) / ((cr - cl) ** 2 + 1e-7),((y2 - y1) ** 2) / ((cb - ct) ** 2 + 1e-7))
    out = inter / area_a if iscrowd else inter / union - D ** delta
    return out if use_batch else out.squeeze(0)


def distance(box_a, box_b, delta = 0.9, iscrowd:bool=False):
    """Compute the jaccard overlap of two sets of boxes.  The jaccard overlap
    is simply the intersection over union of two boxes.  Here we operate on
    ground truth boxes and default boxes. If iscrowd=True, put the crowd in box_b.
    E.g.:
        A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
    Args:
        box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4]
        box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4]
    Return:
        jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)]
    """
    inter = intersect(box_a, box_b)
    use_batch = True
    if box_a.dim() == 2:
        use_batch = False
        box_a = box_a[None, ...]
        box_b = box_b[None, ...]
        inter = inter[None, ...]

    x1 = ((box_a[:, :, 2]+box_a[:, :, 0]) / 2).unsqueeze(2).expand_as(inter)
    y1 = ((box_a[:, :, 3]+box_a[:, :, 1]) / 2).unsqueeze(2).expand_as(inter)
    x2 = ((box_b[:, :, 2]+box_b[:, :, 0]) / 2).unsqueeze(1).expand_as(inter)
    y2 = ((box_b[:, :, 3]+box_b[:, :, 1]) / 2).unsqueeze(1).expand_as(inter)

    t1 = box_a[:, :, 1].unsqueeze(2).expand_as(inter)
    b1 = box_a[:, :, 3].unsqueeze(2).expand_as(inter)
    l1 = box_a[:, :, 0].unsqueeze(2).expand_as(inter)
    r1 = box_a[:, :, 2].unsqueeze(2).expand_as(inter)

    t2 = box_b[:, :, 1].unsqueeze(1).expand_as(inter)
    b2 = box_b[:, :, 3].unsqueeze(1).expand_as(inter)
    l2 = box_b[:, :, 0].unsqueeze(1).expand_as(inter)
    r2 = box_b[:, :, 2].unsqueeze(1).expand_as(inter)

    cr = torch.max(r1, r2)
    cl = torch.min(l1, l2)
    ct = torch.min(t1, t2)
    cb = torch.max(b1, b2)
    D = (((x2 - x1)**2 + (y2 - y1)**2) / ((cr-cl)**2 + (cb-ct)**2 + 1e-7))**delta
    out = D if iscrowd else D
    return out if use_batch else out.squeeze(0)

def speed():
    boxes = 1000*torch.rand((1000, 100, 4), dtype=torch.float)
    boxscores = torch.rand((1000, 100), dtype=torch.float)

    # cuda flag
    cuda = 1 if torch.cuda.is_available() else 0
    if cuda:
        boxes = boxes.cuda()
        boxscores = boxscores.cuda()

    start = time.time()
    for i in range(1000):
        soft_nms_pytorch(boxes[i], boxscores[i], cuda=cuda)
    end = time.time()
    print("Average run time: %f ms" % (end-start))


def test():
    # boxes and boxscores
    boxes = torch.tensor([[200, 200, 400, 400],
                          [220, 220, 420, 420],
                          [200, 240, 400, 440],
                          [240, 200, 440, 400],
                          [1, 1, 2, 2]], dtype=torch.float)
    boxscores = torch.tensor([0.8, 0.7, 0.6, 0.5, 0.9], dtype=torch.float)

    # cuda flag
    cuda = 1 if torch.cuda.is_available() else 0
    if cuda:
        boxes = boxes.cuda()
        boxscores = boxscores.cuda()

    print(soft_nms_pytorch(boxes, boxscores, cuda=cuda))


if __name__ == '__main__':
    test()
    # speed()

代码参考：

YOLOV5_PANDA/nms_pytorch.py at 9c57cb76530f3fda3b1d56cd9a5469898d809b18 · xiamingfu/YOLOV5_PANDA · GitHub

在经典的NMS中，得分最高的检测框和其它检测框逐一算出一个对应的IOU值，并将该值超过NMS threshold的框全部过滤掉。可以看出，在经典NMS算法中，IOU是唯一考量的因素。但是在实际应用场景中，当两个不同物体挨得很近时，由于IOU值比较大，往往经过NMS处理后，只剩下一个检测框，这样导致漏检的错误情况发生。

一个成熟的IoU衡量指标应该要考虑预测框与真实框的重叠面积、中心点距离、长宽比三个方面。但是IoU 只考虑到了预测框与真实框重叠区域，并没有考虑到中心点距离、长宽比。

基于此，DIOU-NMS就不仅仅考虑IOU，还考虑两个框中心点之间的距离。如果两个框之间IOU比较大，但是两个框的中心距离比较大时，可能会认为这是两个物体的框而不会被过滤掉。

具体的IoU、GIoU、DIoU、CIoU的细节可以看我的另一篇博客: Bounding Box regression loss: IoU Loss、GIoU Loss、DIoU Loss、CIoU Loss.

3.2、DIoU
DIoU (Distance-IoU )。简单地在IoU loss基础上添加一个惩罚项，该惩罚项用于最小化两个bbox的中心点距离。

DIoU公式：

如下图，绿色框代表真实框，黑色框代表预测框，b bb为预测框的中心，b g t b^{gt}b
gt
为真实框的中心，ρ 2 ( b , b g t ) \rho^2(b,b^{gt})ρ
2
(b,b
gt
)代表真实框与预测框中心距离的平方d 2 d^2d
2
,c cc表示两个框的最小闭包区域（同时包含了预测框和真实框的最小矩形框）的对角线长度。

Hard NMS 和 DIoU NMS比较：就是target和其他检测框的计算IoU的方式变成了DIoU，其他所有操作都和hard nms 完全相同

四、代码实现
以下代码主要实现了：hard_nms、soft_nms、diou_nms以及一些hard nms的改进版本。

import torch
import numpy as np
import torchvision
import math

def non_max_suppression(prediction, conf_thres=0.1, nms_thres=0.6, multi_cls=True, method='diou_nms'):
    """
        Removes detections with lower object confidence score than 'conf_thres'
        Non-Maximum Suppression to further filter detections.
        param:
             prediction: [batch, num_anchors, (x+y+w+h+1+num_classes)]  3个anchor的预测结果总和
             conf_thres: 先进行一轮筛选，将分数过低的预测框（<conf_thres）删除（分数置0）
             nms_thres: iou阈值, 如果其余预测框与target的iou>iou_thres, 就将那个预测框置0
             multi_label: 是否是多标签
             method: nms方法  (https://github.com/ultralytics/yolov3/issues/679)
                              (https://github.com/ultralytics/yolov3/pull/795)
                        -hard_nms: 普通的 (hard) nms 官方实现(c函数库),可支持gpu,只支持单类别输入
                        -hard_nms_batch: 普通的 (hard) nms 官方实现(c函数库),可支持gpu,支持多类别输入
                        -hard_nms_myself: 普通的 (hard) nms 自己实现的,只支持单类别输入
                        -and: 在hard-nms的逻辑基础上，增加是否为单独框的限制，删除没有重叠框的框（减少误检）。
                        -merge: 在hard-nms的基础上，增加保留框位置平滑策略（重叠框位置信息求解平均值），使框的位置更加精确。
                        -soft_nms: soft nms 用一个衰减函数作用在score上来代替原来的置0
                        -diou_nms: 普通的 (hard) nms 的基础上引入DIoU(普通的nms用的是iou)
        Returns detections with shape:
            (x1, y1, x2, y2, object_conf, conf, class)
    """
    # Box constraints
    min_wh, max_wh = 2, 4096  # (pixels) 宽度和高度的大小范围 [min_wh, max_wh]
    output = [None] * len(prediction)  # batch_size个output  存放最终筛选后的预测框结果
    for image_i, pred in enumerate(prediction):
        # 开始  pred = [12096, 25]
        # 第一层过滤   根据conf_thres虑除背景目标(conf<conf_thres的目标)
        pred = pred[pred[:, 4] > conf_thres]  # pred = [45, 25]

        # 第二层过滤   虑除超小anchor标和超大anchor  x=[45, 25]
        pred = pred[(pred[:, 2:4] > min_wh).all(1) & (pred[:, 2:4] < max_wh).all(1)]

        # 经过前两层过滤后如果该feature map没有目标框了，就结束这轮直接进行下一个feature map
        if len(pred) == 0:
            continue

        # 计算 score
        pred[..., 5:] *= pred[..., 4:5]  # score = obj_conf * cls_conf

        # Box (center x, center y, width, height) to (x1, y1, x2, y2)
        box = xywh2xyxy(pred[:, :4])

        # Detections matrix nx6 (xyxy, conf, cls)
        if multi_cls or conf_thres < 0.01:
            # 第三轮过滤: 针对每个类别score(obj_conf * cls_conf) > conf_thres
            # 这里一个框是有可能有多个物体的，所以要筛选
            # nonzero: 获得矩阵中的非0数据的下标  t(): 将矩阵拆开
            # i: 下标   j: 类别   shape=43  过滤了两个score太低的
            i, j = (pred[:, 5:] > conf_thres).nonzero(as_tuple=False).t()
            # pred = [43, xyxy+conf+class]
            pred = torch.cat((box[i], pred[i, j + 5].unsqueeze(1), j.float().unsqueeze(1)), 1)
        else:  # best class only
            conf, j = pred[:, 5:].max(1)  # 一个类别直接取分数最大类的即可
            pred = torch.cat((box, conf.unsqueeze(1), j.float().unsqueeze(1)), 1)[conf > conf_thres]

        # 第三轮过滤后如果该feature map没有目标框了，就结束这轮直接进行下一个feature map
        if len(pred) == 0:
            continue
        # 第四轮过滤  这轮可有可无，一般没什么用
        # pred = pred[torch.isfinite(pred).all(1)]

        # 降序排列 为NMS做准备
        pred = pred[pred[:, 4].argsort(descending=True)]

        # Batched NMS
        # Batched NMS推理时间：0.054
        if method == 'hard_nms_batch': # 普通的(hard)nms: 官方实现(c函数库),可支持gpu,但支持多类别输入
            # batched_nms：参数1 [43, xyxy]  参数2 [43, score]  参数3 [43, class]  参数4 [43, nms_thres]
            output[image_i] = pred[torchvision.ops.boxes.batched_nms(pred[:, :4], pred[:, 4], pred[:, 5], nms_thres)]
            # print("hard_nms_batch")
            continue

        # All other NMS methods
        det_max = []  # 存放分数最高的框 即target
        cls = pred[:, -1]
        for c in cls.unique():  # 对所有的种类(不重复)
            dc = pred[cls == c]  # dc: 选出pred中所有类别是c的结果
            n = len(dc)
            if n == 1:
                det_max.append(dc)  # No NMS required if only 1 prediction
                continue
            elif n > 500:# 密集性 主要考虑到NMS是一个速度慢的算法（O(n^2)）,预测框太多,算法的效率太慢 所以这里筛选一下（最多500个预测框）
                dc = dc[:500]  # limit to first 500 boxes: https://github.com/ultralytics/yolov3/issues/117

            # 推理时间：0.001
            if method == 'hard_nms':  # 普通的(hard)nms: 只支持单类别输入
                det_max.append(dc[torchvision.ops.boxes.nms(dc[:, :4], dc[:, 4], nms_thres)])

            # 推理时间：0.00299 是官方写的3倍
            elif method == 'hard_nms_myself':  # Hard NMS 自己写的 只支持单类别输入
                while dc.shape[0]:  # dc.shape[0]: 当前class的预测框数量
                    det_max.append(dc[:1])  # 让score最大的一个预测框(排序后的第一个)为target
                    if len(dc) == 1:  # 出口 dc中只剩下一个框时，break
                        break
                    # dc[0] ：target     dc[1:] ：其他预测框
                    diou = bbox_iou(dc[0], dc[1:])  # 计算 diou
                    dc = dc[1:][diou < nms_thres]  # remove dious > threshold

            # 在hard-nms的逻辑基础上，增加是否为单独框的限制，删除没有重叠框的框（减少误检）。
            elif method == 'and':  # requires overlap, single boxes erased
                while len(dc) > 1:
                    iou = bbox_iou(dc[0], dc[1:])  # iou with other boxes
                    if iou.max() > 0.5:  # 删除没有重叠框的框/iou小于0.5的框（减少误检）
                        det_max.append(dc[:1])
                    dc = dc[1:][iou < nms_thres]  # remove ious > threshold

            # 在hard-nms的基础上，增加保留框位置平滑策略（重叠框位置信息求解平均值），使框的位置更加精确。
            elif method == 'merge':  # weighted mixture box
                while len(dc):
                    if len(dc) == 1:
                        det_max.append(dc)
                        break
                    i = bbox_iou(dc[0], dc) > nms_thres  # i = True/False的集合
                    weights = dc[i, 4:5]     # 根据i，保留所有True
                    dc[0, :4] = (weights * dc[i, :4]).sum(0) / weights.sum()  # 重叠框位置信息求解平均值
                    det_max.append(dc[:1])
                    dc = dc[i == 0]

            # 推理时间：0.0030s
            elif method == 'soft_nms':  # soft-NMS      https://arxiv.org/abs/1704.04503
                sigma = 0.5  # soft-nms sigma parameter
                while len(dc):
                    # if len(dc) == 1:  这是U版的源码 我做了个小改动
                    #     det_max.append(dc)
                    #     break
                    # det_max.append(dc[:1])
                    det_max.append(dc[:1])   # 保存dc的第一行  target
                    if len(dc) == 1:
                        break
                    iou = bbox_iou(dc[0], dc[1:])  # 计算target与其他框的iou

                    # 这里和上面的直接置0不同，置0不需要管维度
                    dc = dc[1:]  # dc=target往后的所有预测框
                    # dc必须不包括target及其前的预测框，因为还要和值相乘, 维度上必须相同
                    dc[:, 4] *= torch.exp(-iou ** 2 / sigma)  # 得分衰减
                    dc = dc[dc[:, 4] > conf_thres]

            # 推理时间：0.00299
            elif method == 'diou_nms':  # DIoU NMS  https://arxiv.org/pdf/1911.08287.pdf
                while dc.shape[0]:  # dc.shape[0]: 当前class的预测框数量
                    det_max.append(dc[:1])  # 让score最大的一个预测框(排序后的第一个)为target
                    if len(dc) == 1:  # 出口 dc中只剩下一个框时，break
                        break
                    # dc[0] ：target     dc[1:] ：其他预测框
                    diou = bbox_iou(dc[0], dc[1:], DIoU=True)  # 计算 diou
                    dc = dc[1:][diou < nms_thres]  # remove dious > threshold  保留True 删去False

        if len(det_max):
            det_max = torch.cat(det_max)  # concatenate  因为之前是append进det_max的
            output[image_i] = det_max[(-det_max[:, 4]).argsort()]  # 排序

    return output

def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False):
    """iou giou diou ciou
    Args:
        box1: 预测框
        box2: 真实框
        x1y1x2y2: False
    Returns:
        box1和box2的IoU/GIoU/DIoU/CIoU
    """
    # Returns the IoU of box1 to box2. box1 is 4, box2 is nx4
    box2 = box2.t()  # 转置 ？？？

    # Get the coordinates of bounding boxes
    if x1y1x2y2:  # x1, y1, x2, y2 = box1
        b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
        b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
    else:  # transform from xywh to xyxy
        b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2  # b1左上角和右下角的x坐标
        b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2  # b1左下角和右下角的y坐标
        b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2  # b2左上角和右下角的x坐标
        b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2  # b2左下角和右下角的y坐标

    # Intersection area  tensor.clamp(0): 将矩阵中小于0的元数变成0
    inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \
            (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)

    # Union Area
    w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1
    w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1
    union = (w1 * h1 + 1e-16) + w2 * h2 - inter  # 1e-16: 防止分母为0

    iou = inter / union  # iou
    if GIoU or DIoU or CIoU:
        cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1)  # convex (smallest enclosing box) width
        ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1)  # convex height
        if GIoU:  # Generalized IoU https://arxiv.org/pdf/1902.09630.pdf
            c_area = cw * ch + 1e-16  # convex area
            return iou - (c_area - union) / c_area  # return GIoU
        if DIoU or CIoU:  # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1
            # convex diagonal squared
            c2 = cw ** 2 + ch ** 2 + 1e-16
            # centerpoint distance squared
            rho2 = ((b2_x1 + b2_x2) - (b1_x1 + b1_x2)) ** 2 / 4 + ((b2_y1 + b2_y2) - (b1_y1 + b1_y2)) ** 2 / 4
            if DIoU:
                return iou - rho2 / c2  # DIoU
            elif CIoU:  # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47
                v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2)
                with torch.no_grad():
                    alpha = v / (1 - iou + v)
                return iou - (rho2 / c2 + v * alpha)  # CIoU
    return iou

def xywh2xyxy(x):
    # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
    y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x)
    y[:, 0] = x[:, 0] - x[:, 2] / 2  # top left x
    y[:, 1] = x[:, 1] - x[:, 3] / 2  # top left y
    y[:, 2] = x[:, 0] + x[:, 2] / 2  # bottom right x
    y[:, 3] = x[:, 1] + x[:, 3] / 2  # bottom right y
    
    return y

总结：

hard_nms：直接删除相邻的同类别目标，密集目标的输出不友好。
hard_nms_batch：普通的 (hard) nms 官方实现(c函数库),可支持gpu,支持多类别输入
hard_nms_myself：普通的 (hard) nms 自己实现的,只支持单类别输入
and：在hard-nms的逻辑基础上，增加是否为单独框的限制，删除没有重叠框的框（减少误检）。
merge：在hard-nms的基础上，增加保留框位置平滑策略（重叠框位置信息求解平均值），使框的位置更加精确。
soft_nms：改变其相邻同类别目标置信度（有关iou的函数），后期通过置信度阈值进行过滤，适用于目标密集的场景。
diou_nms：在hard-nms的基础上，用diou替换iou，里有参照diou的优势。
参考：【YOLO v4】常见的非极大值抑制方法：(Hard) NMS、Soft NMS、DIoU NMS_满船清梦压星河HK的博客-CSDN博客_yolov4使用的nms

图片测试方法代码：

# -*- coding:utf-8 -*-
import time

import cv2
import numpy as np
import torch
import torchvision

from utils.general import non_max_suppression_face


def soft_nms_pytorch(dets, box_scores, sigma=0.3, thresh=0.001, cuda=0):
    """
    Build a pytorch implement of Soft NMS algorithm.
    # Augments
        dets:        boxes coordinate tensor (format:[y1, x1, y2, x2])
        box_scores:  box score tensors
        sigma:       variance of Gaussian function
        thresh:      score thresh
        cuda:        CUDA flag
    # Return
        the index of the selected boxes
    """

    # Indexes concatenate boxes with the last column
    N = dets.shape[0]
    if cuda:
        indexes = torch.arange(0, N, dtype=torch.float).cuda().view(N, 1)
    else:
        indexes = torch.arange(0, N, dtype=torch.float).view(N, 1)
    dets = torch.cat((dets, indexes), dim=1)

    # The order of boxes coordinate is [y1,x1,y2,x2]
    y1 = dets[:, 0]
    x1 = dets[:, 1]
    y2 = dets[:, 2]
    x2 = dets[:, 3]
    scores = box_scores
    areas = (x2 - x1 + 1) * (y2 - y1 + 1)

    for i in range(N):
        # intermediate parameters for later parameters exchange
        tscore = scores[i].clone()
        pos = i + 1

        if i != N - 1:
            maxscore, maxpos = torch.max(scores[pos:], dim=0)
            if tscore < maxscore:
                dets[i], dets[maxpos.item() + i + 1] = dets[maxpos.item() + i + 1].clone(), dets[i].clone()
                scores[i], scores[maxpos.item() + i + 1] = scores[maxpos.item() + i + 1].clone(), scores[i].clone()
                areas[i], areas[maxpos + i + 1] = areas[maxpos + i + 1].clone(), areas[i].clone()

        # IoU calculate
        yy1 = np.maximum(dets[i, 0].to("cpu").numpy(), dets[pos:, 0].to("cpu").numpy())
        xx1 = np.maximum(dets[i, 1].to("cpu").numpy(), dets[pos:, 1].to("cpu").numpy())
        yy2 = np.minimum(dets[i, 2].to("cpu").numpy(), dets[pos:, 2].to("cpu").numpy())
        xx2 = np.minimum(dets[i, 3].to("cpu").numpy(), dets[pos:, 3].to("cpu").numpy())

        w = np.maximum(0.0, xx2 - xx1 + 1)
        h = np.maximum(0.0, yy2 - yy1 + 1)
        inter = torch.tensor(w * h).cuda() if cuda else torch.tensor(w * h)
        ovr = torch.div(inter, (areas[i] + areas[pos:] - inter))

        # Gaussian decay
        weight = torch.exp(-(ovr * ovr) / sigma)
        scores[pos:] = weight * scores[pos:]

    # select the boxes and keep the corresponding indexes
    keep = dets[:, 4][scores > thresh].int()

    return keep.type(torch.long)


def cluster_nms(boxes, scores, iou_threshold: float = 0.5, top_k: int = 200):
    # Collapse all the classes into 1
    _, idx = scores.sort(0, descending=True)
    # idx = idx[:top_k]
    boxes_idx = boxes[idx]
    iou = jaccard(boxes_idx, boxes_idx).triu_(diagonal=1)
    B = iou
    for i in range(200):
        A = B
        maxA, _ = torch.max(A, dim=0)
        E = (maxA <= iou_threshold).float().unsqueeze(1).expand_as(A)
        B = iou.mul(E)
        if A.equal(B) == True:
            break
    idx_out = idx[maxA <= iou_threshold]
    return idx_out


def cluster_diounms(boxes, scores, iou_threshold: float = 0.5, dense_mask=[], top_k: int = 200):
    # Collapse all the classes into 1
    _, idx = scores.sort(0, descending=True)
    # idx = idx[:top_k]
    boxes_idx = boxes[idx]
    iou = diou(boxes_idx, boxes_idx, delta=0.7).triu_(diagonal=1)
    B = iou
    x_inds = (boxes_idx[:, 0] + boxes_idx[:, 2]) // 16
    y_inds = (boxes_idx[:, 1] + boxes_idx[:, 3]) // 16
    y_inds[y_inds >= 76] = 75
    y_inds[y_inds < 0] = 0
    x_inds[x_inds >= 136] = 135
    x_inds[x_inds < 0] = 0
    x_inds = x_inds.cpu().numpy().astype(np.int16).tolist()
    y_inds = y_inds.cpu().numpy().astype(np.int16).tolist()
    dense_mask = dense_mask.squeeze(dim=0).squeeze(dim=0)
    dense_mask = dense_mask[y_inds, x_inds].cuda()
    dense_mask[dense_mask <= iou_threshold] = iou_threshold
    for i in range(200):
        A = B
        maxA, _ = torch.max(A, dim=0)
        E = (torch.lt(maxA, dense_mask)).float().unsqueeze(1).expand_as(A)
        B = iou.mul(E)
        if A.equal(B) == True:
            break
    idx_out = idx[torch.lt(maxA, dense_mask)]
    return idx_out


def cluster_SPM_nms(boxes, scores, iou_threshold: float = 0.5, top_k: int = 200):
    # Collapse all the classes into 1
    _, idx = scores.sort(0, descending=True)
    boxes_idx = boxes[idx]
    scores = scores[idx]
    boxes = boxes_idx
    iou = jaccard(boxes_idx, boxes_idx).triu_(diagonal=1)
    B = iou
    for i in range(200):
        A = B
        maxA, _ = torch.max(A, dim=0)
        E = (maxA <= iou_threshold).float().unsqueeze(1).expand_as(A)
        B = iou.mul(E)
        if A.equal(B) == True:
            break
    scores = torch.prod(torch.exp(-B ** 2 / 0.2), 0) * scores
    idx_out = scores > 0.01
    return idx[idx_out]


def cluster_SPM_dist_nms(boxes, scores, iou_threshold: float = 0.5, top_k: int = 200):
    # Collapse all the classes into 1
    _, idx = scores.sort(0, descending=True)
    boxes_idx = boxes[idx]
    scores = scores[idx]
    boxes = boxes_idx
    iou = jaccard(boxes_idx, boxes_idx).triu_(diagonal=1)
    B = iou
    for i in range(200):
        A = B
        maxA, _ = torch.max(A, dim=0)
        E = (maxA <= iou_threshold).float().unsqueeze(1).expand_as(A)
        B = iou.mul(E)
        if A.equal(B) == True:
            break
    D = distance(boxes, boxes, delta=0.7)
    X = (B >= 0).float()
    scores = torch.prod(torch.min(torch.exp(-B ** 2 / 0.2) + D * ((B > 0).float()), X), 0) * scores
    idx_out = scores > 0.15

    return idx[idx_out]


def intersect(box_a, box_b):
    """ We resize both tensors to [A,B,2] without new malloc:
    [A,2] -> [A,1,2] -> [A,B,2]
    [B,2] -> [1,B,2] -> [A,B,2]
    Then we compute the area of intersect between box_a and box_b.
    Args:
      box_a: (tensor) bounding boxes, Shape: [A,4].
      box_b: (tensor) bounding boxes, Shape: [B,4].
    Return:
      (tensor) intersection area, Shape: [A,B].
    """
    A = box_a.size(0)
    B = box_b.size(0)
    max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2), box_b[:, 2:].unsqueeze(0).expand(A, B, 2))
    min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2), box_b[:, :2].unsqueeze(0).expand(A, B, 2))
    inter = torch.clamp((max_xy - min_xy), min=0)
    return inter[:, :, 0] * inter[:, :, 1]


def jaccard(box_a, box_b, iscrowd=False):
    """Compute the jaccard overlap of two sets of boxes.  The jaccard overlap
    is simply the intersection over union of two boxes.  Here we operate on
    ground truth boxes and default boxes. If iscrowd=True, put the crowd in box_b.
    E.g.:
        A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
    Args:
        box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4]
        box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4]
    Return:
        jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)]
    """
    inter = intersect(box_a, box_b)
    area_a = ((box_a[:, 2] - box_a[:, 0]) * (box_a[:, 3] - box_a[:, 1])).unsqueeze(1).expand_as(inter)  # [A,B]
    area_b = ((box_b[:, 2] - box_b[:, 0]) * (box_b[:, 3] - box_b[:, 1])).unsqueeze(0).expand_as(inter)  # [A,B]
    union = area_a + area_b - inter

    if iscrowd:
        return inter / area_a
    else:
        return inter / union  # [A,B]


def diou(box_a, box_b, delta=0.9, iscrowd: bool = False):
    """Compute the jaccard overlap of two sets of boxes.  The jaccard overlap
    is simply the intersection over union of two boxes.  Here we operate on
    ground truth boxes and default boxes. If iscrowd=True, put the crowd in box_b.
    E.g.:
        A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
    Args:
        box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4]
        box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4]
    Return:
        jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)]
    """
    inter = intersect(box_a, box_b)
    use_batch = True
    if box_a.dim() == 2:
        use_batch = False
        box_a = box_a[None, ...]
        box_b = box_b[None, ...]
        inter = inter[None, ...]

    area_a = ((box_a[:, :, 2] - box_a[:, :, 0]) * (box_a[:, :, 3] - box_a[:, :, 1])).unsqueeze(2).expand_as(
        inter)  # [A,B]
    area_b = ((box_b[:, :, 2] - box_b[:, :, 0]) * (box_b[:, :, 3] - box_b[:, :, 1])).unsqueeze(1).expand_as(
        inter)  # [A,B]
    union = area_a + area_b - inter
    x1 = ((box_a[:, :, 2] + box_a[:, :, 0]) / 2).unsqueeze(2).expand_as(inter)
    y1 = ((box_a[:, :, 3] + box_a[:, :, 1]) / 2).unsqueeze(2).expand_as(inter)
    x2 = ((box_b[:, :, 2] + box_b[:, :, 0]) / 2).unsqueeze(1).expand_as(inter)
    y2 = ((box_b[:, :, 3] + box_b[:, :, 1]) / 2).unsqueeze(1).expand_as(inter)

    t1 = box_a[:, :, 1].unsqueeze(2).expand_as(inter)
    b1 = box_a[:, :, 3].unsqueeze(2).expand_as(inter)
    l1 = box_a[:, :, 0].unsqueeze(2).expand_as(inter)
    r1 = box_a[:, :, 2].unsqueeze(2).expand_as(inter)

    t2 = box_b[:, :, 1].unsqueeze(1).expand_as(inter)
    b2 = box_b[:, :, 3].unsqueeze(1).expand_as(inter)
    l2 = box_b[:, :, 0].unsqueeze(1).expand_as(inter)
    r2 = box_b[:, :, 2].unsqueeze(1).expand_as(inter)
    cr = torch.max(r1, r2)
    cl = torch.min(l1, l2)
    ct = torch.min(t1, t2)
    cb = torch.max(b1, b2)
    D = (((x2 - x1) ** 2 + (y2 - y1) ** 2) / ((cr - cl) ** 2 + (cb - ct) ** 2 + 1e-7))
    out = inter / area_a if iscrowd else inter / union - D ** delta
    return out if use_batch else out.squeeze(0)


def d2iou(box_a, box_b, delta=0.9, iscrowd: bool = False):
    """Compute the jaccard overlap of two sets of boxes.  The jaccard overlap
    is simply the intersection over union of two boxes.  Here we operate on
    ground truth boxes and default boxes. If iscrowd=True, put the crowd in box_b.
    E.g.:
        A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
    Args:
        box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4]
        box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4]
    Return:
        jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)]
    """
    inter = intersect(box_a, box_b)
    use_batch = True
    if box_a.dim() == 2:
        use_batch = False
        box_a = box_a[None, ...]
        box_b = box_b[None, ...]
        inter = inter[None, ...]

    area_a = ((box_a[:, :, 2] - box_a[:, :, 0]) * (box_a[:, :, 3] - box_a[:, :, 1])).unsqueeze(2).expand_as(
        inter)  # [A,B]
    area_b = ((box_b[:, :, 2] - box_b[:, :, 0]) * (box_b[:, :, 3] - box_b[:, :, 1])).unsqueeze(1).expand_as(
        inter)  # [A,B]
    union = area_a + area_b - inter
    x1 = ((box_a[:, :, 2] + box_a[:, :, 0]) / 2).unsqueeze(2).expand_as(inter)
    y1 = ((box_a[:, :, 3] + box_a[:, :, 1]) / 2).unsqueeze(2).expand_as(inter)
    x2 = ((box_b[:, :, 2] + box_b[:, :, 0]) / 2).unsqueeze(1).expand_as(inter)
    y2 = ((box_b[:, :, 3] + box_b[:, :, 1]) / 2).unsqueeze(1).expand_as(inter)
    w1 = ((box_a[:, :, 2] - box_a[:, :, 0])).unsqueeze(2).expand_as(inter)
    h1 = ((box_a[:, :, 3] - box_a[:, :, 1])).unsqueeze(2).expand_as(inter)
    w2 = ((box_b[:, :, 2] - box_b[:, :, 0])).unsqueeze(1).expand_as(inter)
    h2 = ((box_b[:, :, 3] - box_b[:, :, 1])).unsqueeze(1).expand_as(inter)

    t1 = box_a[:, :, 1].unsqueeze(2).expand_as(inter)
    b1 = box_a[:, :, 3].unsqueeze(2).expand_as(inter)
    l1 = box_a[:, :, 0].unsqueeze(2).expand_as(inter)
    r1 = box_a[:, :, 2].unsqueeze(2).expand_as(inter)

    t2 = box_b[:, :, 1].unsqueeze(1).expand_as(inter)
    b2 = box_b[:, :, 3].unsqueeze(1).expand_as(inter)
    l2 = box_b[:, :, 0].unsqueeze(1).expand_as(inter)
    r2 = box_b[:, :, 2].unsqueeze(1).expand_as(inter)
    cr = torch.max(r1, r2)
    cl = torch.min(l1, l2)
    ct = torch.min(t1, t2)
    cb = torch.max(b1, b2)
    # D = (((x2 - x1)**2 + (y2 - y1)**2) / ((cr-cl)**2 + (cb-ct)**2 + 1e-7))
    # Deform = (torch.abs(torch.log(w1/w2))+torch.abs(torch.log(h1/h2))+torch.abs(torch.log((w1*h1)/(w2*h2))))**2.5
    D = torch.max(((x2 - x1) ** 2) / ((cr - cl) ** 2 + 1e-7), ((y2 - y1) ** 2) / ((cb - ct) ** 2 + 1e-7))
    out = inter / area_a if iscrowd else inter / union - D ** delta
    return out if use_batch else out.squeeze(0)


def distance(box_a, box_b, delta=0.9, iscrowd: bool = False):
    """Compute the jaccard overlap of two sets of boxes.  The jaccard overlap
    is simply the intersection over union of two boxes.  Here we operate on
    ground truth boxes and default boxes. If iscrowd=True, put the crowd in box_b.
    E.g.:
        A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
    Args:
        box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4]
        box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4]
    Return:
        jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)]
    """
    inter = intersect(box_a, box_b)
    use_batch = True
    if box_a.dim() == 2:
        use_batch = False
        box_a = box_a[None, ...]
        box_b = box_b[None, ...]
        inter = inter[None, ...]

    x1 = ((box_a[:, :, 2] + box_a[:, :, 0]) / 2).unsqueeze(2).expand_as(inter)
    y1 = ((box_a[:, :, 3] + box_a[:, :, 1]) / 2).unsqueeze(2).expand_as(inter)
    x2 = ((box_b[:, :, 2] + box_b[:, :, 0]) / 2).unsqueeze(1).expand_as(inter)
    y2 = ((box_b[:, :, 3] + box_b[:, :, 1]) / 2).unsqueeze(1).expand_as(inter)

    t1 = box_a[:, :, 1].unsqueeze(2).expand_as(inter)
    b1 = box_a[:, :, 3].unsqueeze(2).expand_as(inter)
    l1 = box_a[:, :, 0].unsqueeze(2).expand_as(inter)
    r1 = box_a[:, :, 2].unsqueeze(2).expand_as(inter)

    t2 = box_b[:, :, 1].unsqueeze(1).expand_as(inter)
    b2 = box_b[:, :, 3].unsqueeze(1).expand_as(inter)
    l2 = box_b[:, :, 0].unsqueeze(1).expand_as(inter)
    r2 = box_b[:, :, 2].unsqueeze(1).expand_as(inter)

    cr = torch.max(r1, r2)
    cl = torch.min(l1, l2)
    ct = torch.min(t1, t2)
    cb = torch.max(b1, b2)
    D = (((x2 - x1) ** 2 + (y2 - y1) ** 2) / ((cr - cl) ** 2 + (cb - ct) ** 2 + 1e-7)) ** delta
    out = D if iscrowd else D
    return out if use_batch else out.squeeze(0)


def speed():
    boxes = 1000 * torch.rand((1000, 100, 4), dtype=torch.float)
    boxscores = torch.rand((1000, 100), dtype=torch.float)

    # cuda flag
    cuda = 1 if torch.cuda.is_available() else 0
    if cuda:
        boxes = boxes.cuda()
        boxscores = boxscores.cuda()

    start = time.time()
    for i in range(1000):
        soft_nms_pytorch(boxes[i], boxscores[i], cuda=cuda)
    end = time.time()
    print("Average run time: %f ms" % (end - start))


def test():
    # boxes and boxscores
    boxes = torch.tensor([
        [683, 33, 1188, 715],
        [187,198,753,720], [187, 198, 1188, 720],
        # [200, 200, 400, 400],
        # [220, 220, 420, 420]
      ],
        dtype=torch.float)
    image = cv2.imread('data/images/zidane.jpg')

    image2=image.copy()
    for index, box in enumerate(boxes):
        color=(0, 255, 255)
        if index==1:
            color = (0, 127, 0)
        cv2.rectangle(image2, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])),color, 2)
    cv2.imshow("image_0",image2)
    cv2.waitKey(1)

    boxscores = torch.tensor([0.8, 0.7,0.7], dtype=torch.float)
    nms_thres=0.5
    # cuda flag
    cuda = 1 if torch.cuda.is_available() else 0
    if cuda:
        boxes = boxes.cuda()
        boxscores = boxscores.cuda()
    method='standard'
    if method == 'standard':
        nms_indices = torchvision.ops.nms(boxes, boxscores, nms_thres)
        # nms_indices = nms(pred[:, :4], pred[:, 4], nms_thres)
    elif method == 'soft':
        nms_indices = soft_nms_pytorch(boxes, boxscores, sigma=0.5, thresh=0.2, cuda=cuda)
    elif method == "cluster":
        nms_indices = cluster_nms(boxes, boxscores, nms_thres)
    elif method == "cluster_SPM":
        nms_indices = cluster_SPM_nms(boxes, boxscores, nms_thres)
    elif method == "cluster_diou":
        nms_indices = cluster_diounms(boxes, boxscores, nms_thres)
    elif method == "cluster_SPM_dist":
        nms_indices = cluster_SPM_dist_nms(boxes, boxscores, nms_thres)

    # keep=soft_nms_pytorch(boxes, boxscores, cuda=cuda)


    print(nms_indices)
    for index in nms_indices:
        box = boxes[index]
        cv2.rectangle(image, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (0, 0, 255), 2)
    cv2.imshow("result",image)
    cv2.waitKey()


if __name__ == '__main__':
    test()

diou测试：

import cv2
import torch


def diou_box_nms(boxes,scores,  iou_thres):
    if boxes.shape[0] == 0:
        return torch.zeros(0 ,device=boxes.device).long()
    x1 ,y1 ,x2 ,y2 = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[: ,3]
    areas = (x2 - x1 + 1) * (y2 - y1 + 1)
    order = torch.sort(scores, descending=True)[1]  # (?,)
    keep =[]
    while order.numel() > 0:
        if order.numel() == 1:
            keep.append(order.item())
            break
        else:
            i = order[0].item()
            keep.append(i)

            xmin = torch.clamp(x1[order[1:]], min=float(x1[i]))
            ymin = torch.clamp(y1[order[1:]], min=float(y1[i]))
            xmax = torch.clamp(x2[order[1:]], max=float(x2[i]))
            ymax = torch.clamp(y2[order[1:]], max=float(y2[i]))

            inter_area = torch.clamp(xmax - xmin, min=0.0) * torch.clamp(ymax - ymin, min=0.0)

            iou = inter_area / (areas[i] + areas[order[1:]] - inter_area + 1e-16)

            # diou add center
            # inter_diag
            cxpreds = (x2[i] + x1[i]) / 2
            cypreds = (y2[i] + y1[i]) / 2

            cxbbox = (x2[order[1:]] + x1[order[1:]]) / 2
            cybbox = (y1[order[1:]] + y2[order[1:]]) / 2

            inter_diag = (cxbbox - cxpreds) ** 2 + (cybbox - cypreds) ** 2

            # outer_diag
            ox1 = torch.min(x1[order[1:]], x1[i])
            oy1 = torch.min(y1[order[1:]], y1[i])
            ox2 = torch.max(x2[order[1:]], x2[i])
            oy2 = torch.max(y2[order[1:]], y2[i])

            outer_diag = (ox1 - ox2) ** 2 + (oy1 - oy2) ** 2

            diou = iou - inter_diag / outer_diag
            diou = torch.clamp(diou, min=-1.0, max=1.0)

            # mask_ind = (iou <= iou_thres).nonzero().squeeze()
            mask_ind = (diou <= iou_thres).nonzero().squeeze()

            if mask_ind.numel() == 0:
                break
            order = order[mask_ind + 1]
    return torch.LongTensor(keep)

if __name__ == '__main__':

    boxes = torch.tensor([[683, 33, 1188, 715], [187, 198, 753, 720], [187, 198, 1188, 720], # [200, 200, 400, 400],
        # [220, 220, 420, 420]
    ], dtype=torch.float)
    image = cv2.imread('../inference/images/zidane.jpg')

    image2 = image.copy()
    for index, box in enumerate(boxes):
        color = (0, 255, 255)
        if index == 1:
            color = (0, 127, 0)
        cv2.rectangle(image2, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), color, 2)
    cv2.imshow("image_0", image2)
    cv2.waitKey(1)

    scores = torch.tensor([0.8, 0.7, 0.7], dtype=torch.float)


    keep=diou_box_nms(boxes,scores,0.6)

    print(keep)
    for index in keep:
        box = boxes[index]
        cv2.rectangle(image, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (0, 0, 255), 2)
    cv2.imshow("result", image)
    cv2.waitKey()

iou-giou-diou-ciou-nms python numpy 代码例子

转自：

iou-giou-diou-ciou-nms相关code_小猪猪爱吃饭的博客-CSDN博客

import math

import numpy


def iou(box1,box2):
     x1,y1,x2,y2=box1
     x3,y3,x4,y4=box2
     area1=max(0,x2-x1+1)*max(0,y2-y1+1)
     area2=max(0,x4-x3+1)*max(0,y4-y3+1)

     xx1=max(x1,x3)
     yy1=max(y1,y3)
     xx2=min(x2,x4)
     yy2=min(y2,y4)

     w=max(0,xx2-xx1+1)
     h=max(0,yy2-yy1+1)
     inter=w*h
     u=area1+area2-inter

     return inter/u

def giou(box1,box2):
    x1, y1, x2, y2 = box1
    x3, y3, x4, y4 = box2
    area1 = max(0, x2 - x1 + 1) * max(0, y2 - y1 + 1)
    area2 = max(0, x4 - x3 + 1) * max(0, y4 - y3 + 1)

    xx1 = max(x1, x3)
    yy1 = max(y1, y3)
    xx2 = min(x2, x4)
    yy2 = min(y2, y4)
    ae_x1=min(x1,x3)
    ae_y1=min(y1,y3)
    ae_x2=max(x2,x4)
    ae_y2=max(y2,y4)

    inter=max(0,xx2-xx1+1)*max(0,yy2-yy1+1)
    u = area1 + area2 - inter
    ae=max(0,ae_x2-ae_x1+1)*max(ae_y2-ae_y1+1)

    return inter/u-(ae-u)/ae

def diou(box1,box2):
    x1, y1, x2, y2 = box1
    x3, y3, x4, y4 = box2
    area1 = max(0, x2 - x1 + 1) * max(0, y2 - y1 + 1)
    area2 = max(0, x4 - x3 + 1) * max(0, y4 - y3 + 1)

    c1_x,c1_y=(x2+x1)/2,(y2+y1)/2
    c2_x,c2_y=(x4+x3)/2,(y4+y3)/2
    p2=(c2_x-c1_x)*(c2_x-c1_x)+(c2_y-c1_y)*(c2_y-c1_y)

    xx1 = max(x1, x3)
    yy1 = max(y1, y3)
    xx2 = min(x2, x4)
    yy2 = min(y2, y4)
    ae_x1=min(x1,x3)
    ae_y1=min(y1,y3)
    ae_x2=max(x2,x4)
    ae_y2=max(y2,y4)
    inter=max(0,xx2-xx1+1)*max(0,yy2-yy1+1)
    u = area1 + area2 - inter
    c2=(ae_x2-ae_x1)*(ae_x2-ae_x1)+(ae_y2-ae_y1)*(ae_y2-ae_y1)

    return inter/u-p2/c2


def ciou(box1, box2,alpha):
    x1, y1, x2, y2 = box1
    x3, y3, x4, y4 = box2
    w1,h1=max(x2-x1+1,0),max(y2-y1+1,1e-15)
    w2, h2 = max(x4 - x3 + 1, 0), max(y4 - y3 + 1, 1e-15)
    area1 = max(0, x2 - x1 + 1) * max(0, y2 - y1 + 1)
    area2 = max(0, x4 - x3 + 1) * max(0, y4 - y3 + 1)

    c1_x, c1_y = (x2 + x1) / 2, (y2 + y1) / 2
    c2_x, c2_y = (x4 + x3) / 2, (y4 + y3) / 2
    p2 = (c2_x - c1_x) * (c2_x - c1_x) + (c2_y - c1_y) * (c2_y - c1_y)

    xx1 = max(x1, x3)
    yy1 = max(y1, y3)
    xx2 = min(x2, x4)
    yy2 = min(y2, y4)
    ae_x1 = min(x1, x3)
    ae_y1 = min(y1, y3)
    ae_x2 = max(x2, x4)
    ae_y2 = max(y2, y4)
    inter = max(0, xx2 - xx1 + 1) * max(0, yy2 - yy1 + 1)
    u = area1 + area2 - inter
    c2 = (ae_x2 - ae_x1) * (ae_x2 - ae_x1) + (ae_y2 - ae_y1) * (ae_y2 - ae_y1)

    v=4*((numpy.arctan(w1/h1)-numpy.arctan(w2/h2))**2)/(numpy.pi**2)

    return inter / u - p2 / c2-alpha*v

def nms(dets,thre):
    x1,y1,x2,y2,s=dets[:0],dets[:1],dets[:2],dets[:3],dets[:4]
    order=numpy.argsort(s)[::-1]
    keep=[]
    area=(x2-x1+1)*(y2-y1+1)

    while order:
        i=order[0]
        keep.append(i)

        xx1=numpy.maximum(x1[i],x1[order[1:]])
        yy1 = numpy.maximum(y1[i], y1[order[1:]])
        xx2 = numpy.minimum(x2[i], x2[order[1:]])
        yy2 = numpy.minimum(y2[i], y2[order[1:]])

        w=numpy.maximum(xx2-xx1+1,0)
        h=numpy.maximum(yy2-yy1+1,0)
        inter=w*h
        iou=inter/(area[i]+area[order[1:]]-inter)
        inds=numpy.where(iou<=thre)[0]
        order=order[inds+1]
    return  keep

AI算法网奇

关注

1
点赞
踩
19

收藏

觉得还不错? 一键收藏
打赏
1
评论
yolov5 soft_nms cluster_nms,cluster_SPM_nms,cluster_diounms,cluster_SPM_dist_nms,diou_nms

调用： # Non-maximum suppression if method == 'standard': nms_indices = nms(pred[:, :4], pred[:, 4], nms_thres) elif method == 'soft': nms_indices = soft_nms_pytorch(pred[:, :4], pred[:, 4], sigma=0.5, thres..
复制链接

扫一扫