yolo 推理 nms

测试代码 

另外一个说明cv2绘制不了中文,但可以用其他包实现。

from pathlib import Path

import cv2
import torch

from models.common import DetectMultiBackend
from utils.dataloaders import LoadImages
from utils.general import Profile, increment_path, non_max_suppression, scale_boxes
from utils.plots import Annotator
from utils.torch_utils import select_device

device = 'cpu'
weights = 'D:\PycharmProjects\swallow\wights\yolov5s.pt'
device = select_device(device)
dnn = False
half = False
data = 'D:\PycharmProjects\swallow\config\coco128.yaml'

model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)

source = 'D:\PycharmProjects\swallow\data\images'
imgsz = (640, 640)
stride = 32
pt = True
vid_stride = 1
bs = 1  # batch_size
conf_thres = 0.25  # confidence threshold
iou_thres = 0.45  # NMS IOU threshold
classes = [0, 1, 2, 3, 4]
agnostic_nms = False
max_det = 1000
dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride)
model.warmup(imgsz=(1 if pt or model.triton else bs, 3, *imgsz))  # warmup
seen, windows, dt = 0, [], (Profile(), Profile(), Profile())
for i, (path, im, im0s, vid_cap, s) in enumerate(dataset):
    with dt[0]:
        im = torch.from_numpy(im).to(model.device)
        im = im.half() if model.fp16 else im.float()  # uint8 to fp16/32
        im /= 255  # 0 - 255 to 0.0 - 1.0
        if len(im.shape) == 3:
            im = im[None]  # expand for batch dim
    with dt[1]:
        pred = model(im, augment=True, visualize=False)
        # NMS
    with dt[2]:
        pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)
    print(f'预测数据:{pred}')
    for i, det in enumerate(pred):  # per image
        p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0)
        p = Path(p)  # to Path
        det[:, :4] = scale_boxes(im.shape[2:], det[:, :4], im0.shape).round()
        for d in det:
            cv2.rectangle(im0, (int(d[0]), int(d[1])), (int(d[2]), int(d[3])), (0, 0, 255), 2)
    cv2.imshow('name', im0)
    cv2.waitKey(0)

DetectMultiBackend:

支持各种模型推理:

# Usage:
#   PyTorch:              weights = *.pt
#   TorchScript:                    *.torchscript
#   ONNX Runtime:                   *.onnx
#   ONNX OpenCV DNN:                *.onnx --dnn
#   OpenVINO:                       *_openvino_model
#   CoreML:                         *.mlmodel
#   TensorRT:                       *.engine
#   TensorFlow SavedModel:          *_saved_model
#   TensorFlow GraphDef:            *.pb
#   TensorFlow Lite:                *.tflite
#   TensorFlow Edge TPU:            *_edgetpu.tflite
#   PaddlePaddle:                   *_paddle_model

1.首先根据文件后缀判断文件类型。

pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle, triton = self._model_type(w)

2:初始化模型

        elif jit:  # TorchScript
            LOGGER.info(f'Loading {w} for TorchScript inference...')
            extra_files = {'config.txt': ''}  # model metadata
            model = torch.jit.load(w, _extra_files=extra_files, map_location=device)
            model.half() if fp16 else model.float()
            if extra_files['config.txt']:  # load metadata dict
                d = json.loads(extra_files['config.txt'],
                               object_hook=lambda d: {int(k) if k.isdigit() else k: v
                                                      for k, v in d.items()})
                stride, names = int(d['stride']), d['names']

3:forward调用模型

        elif self.jit:  # TorchScript
            y = self.model(im)

结合export.py 工具,可以导出不同的模型,运行不同形式的模型。

Detect:

训练时候的损失函数:

https://mp.csdn.net/mp_blog/creation/editor/128985650

                pxy = pxy.sigmoid() * 2 - 0.5
                pwh = (pwh.sigmoid() * 2) ** 2 * anchors[i]

推理还原代码:

self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i)

xy, wh, conf = x[i].sigmoid().split((2, 2, self.nc + 1), 4)
xy = (xy * 2 + self.grid[i]) * self.stride[i]  # xy
wh = (wh * 2) ** 2 * self.anchor_grid[i]  # wh
y = torch.cat((xy, wh, conf), 4)

解释:

yolo模型是基于特征金字塔。比如原始图片大小(640, 480), 那么他会按步长(8, 16, 32)下降得到新的三张特征图[(80, 60), *(40, 30), ,]。 那么还原回去是不是也应该乘以步长, 其实从损失函数可以看出,模型预测的只是一个偏移。所以还原回去,按照原定方式还原就行了。

模型输出:

z.append(y.view(bs, self.na * nx * ny, self.no))

本来应该是(1, 3, 80, 60, 85)  含义是:有一张图片,把它分成 (80, 60)的网格,每个网格有3个先验框。每个先验框预测 box(x, y, w ,h) 4 + 置信度 (1)+ 类别热编码(80)。

推理的时候我们只关心,预测的物体。所以view了一下。含义为:预测了几张图片,总共预测了多少物体(其中大部分是背景,因为存在3张特征图,预测量是非常恐怖的)

nms:

        1: 根据置信度,过滤大量的背景或者不符合的预测值

xc = prediction[..., 4] > conf_thres  # candidates

    for xi, x in enumerate(prediction):  # image index, image inference
        x = x[xc[xi]]  # confidence

       2:box坐标转换

box = xywh2xyxy(x[:, :4]) 

       3: 计算得分,得到预测类别最高得分, 过滤掉不符合的类别

             类别的得分,是置信度 * 类别概率的综合分数。但是判别标准还是置信度阈值。

 x[:, 5:] *= x[:, 4:5]  # conf = obj_conf * cls_conf

 conf, j = x[:, 5:mi].max(1, keepdim=True)
 x = torch.cat((box, conf, j.float(), mask), 1)[conf.view(-1) > conf_thres]

   4:根据置信度排序

x = x[x[:, 4].argsort(descending=True)]  # sort by confidence

5:计算nms

boxes, scores = x[:, :4] + c, x[:, 4]  # boxes (offset by class), scores
        i = torchvision.ops.nms(boxes, scores, iou_thres)  # NMS

参考资料

NMS(非极大值抑制)_zouxiaolv的博客-CSDN博客_非极大值抑制

  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值