Yolact_minimal 代码修改及其他

lzfshub

已于 2022-11-12 22:58:18 修改

阅读量286

点赞数

分类专栏： ubuntu 文章标签：深度学习 python 计算机视觉

于 2022-11-12 15:49:01 首次发布

本文链接：https://blog.csdn.net/weixin_40511249/article/details/127821074

版权

ubuntu 专栏收录该内容

5 篇文章 0 订阅

订阅专栏

0. 源码
Yolact_minimal

1. 导出推理文件
使用如下导出方式比官方的快一倍左右

python -m onnxsim ./onnx_files/res50_coco.onnx ./onnx_files/res50_coco_sim.onnx # 进行精简
trtexec --onnx=onnx_files/res50_coco_sim.onnx --workspace=10240 --int8 --saveEngine=trt_files/res50_coco.engine

2. webcam预测

参数解释添加：

#!/usr/bin/env python 
# -*- coding:utf-8 -*-
import argparse
import cv2
import time
import math
import time
import torch
import tensorrt as trt
import pycuda.driver as cuda
import pycuda.autoinit
import numpy as np

from config import get_config
from utils.box_utils import make_anchors
from utils.augmentations import val_aug
from utils.output_utils import nms, after_nms, draw_img

parser = argparse.ArgumentParser(description='YOLACT Detection with TensorRT.')
parser.add_argument('--weight', default='trt_files/res101_coco.trt', type=str)
parser.add_argument('--img_size', type=int, default=544, help='The image size for validation.')
parser.add_argument('--traditional_nms', default=False, action='store_true', help='Whether to use traditional nms.')
parser.add_argument('--hide_mask', default=False, action='store_true', help='Hide masks in results.')
parser.add_argument('--hide_bbox', default=False, action='store_true', help='Hide boxes in results.')
parser.add_argument('--hide_score', default=False, action='store_true', help='Hide scores in results.')
parser.add_argument('--cutout', default=False, action='store_true', help='Cut out each object and save.')
parser.add_argument('--save_lincomb', default=False, action='store_true', help='Show the generating process of masks.')
parser.add_argument('--no_crop', default=False, action='store_true',
                    help='Do not crop the output masks with the predicted bounding box.')
parser.add_argument('--real_time', default=True, action='store_true', help='Show the detection results real-timely.')
parser.add_argument('--visual_thre', default=0.5, type=float,
                    help='Detections with a score under this threshold will be removed.')

args = parser.parse_args()
args.cfg = 'res50_coco'
cfg = get_config(args, mode='detect')


# Simple helper data class that's a little nicer to use than a 2-tuple.
class HostDeviceMem:
    def __init__(self, host_mem, device_mem):
        self.host = host_mem
        self.device = device_mem

    def __str__(self):
        return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)

    def __repr__(self):
        return self.__str__()

anchors = []
fpn_fm_shape = [math.ceil(cfg.img_size / stride) for stride in (8, 16, 32, 64, 128)]
for i, size in enumerate(fpn_fm_shape):
    anchors += make_anchors(cfg, size, size, cfg.scales[i])

# prepare engine
with open(cfg.weight, 'rb') as f, trt.Runtime(trt.Logger(trt.Logger.WARNING)) as runtime:
    engine = runtime.deserialize_cuda_engine(f.read())
    inputs, outputs, bindings = [], [], []
    stream = cuda.Stream()

    for binding in engine:
        size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
        dtype = trt.nptype(engine.get_binding_dtype(binding))

        # Allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size, dtype)
        device_mem = cuda.mem_alloc(host_mem.nbytes)

        # Append the device buffer to device bindings.
        bindings.append(int(device_mem))

        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem, device_mem))
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem))

# ------------------------------------------------------------------------------------------------------------
# Since also the inference procedure are done on GPU, so any other CUDA relevant operation should be excluded,
# e.g. CUDA operation in PyTorch, or some unexpected error may occur.
# ------------------------------------------------------------------------------------------------------------

anchors = []
fpn_fm_shape = [math.ceil(cfg.img_size / stride) for stride in (8, 16, 32, 64, 128)]
for i, size in enumerate(fpn_fm_shape):
    anchors += make_anchors(cfg, size, size, cfg.scales[i])

cap = cv2.VideoCapture(0)
max_fps = 0
while True:
    img_origin = cap.read()[1]
    img_origin = cv2.flip(img_origin, 1)
    img_h, img_w = img_origin.shape[0:2]
    img = val_aug(img_origin, cfg.img_size)
    img = img[np.newaxis, :]

    with engine.create_execution_context() as context:
        start = time.time()
        inputs[0].host = img  # input dtype should be float32
        # Transfer input data to the GPU.
        [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
        # Run inference.
        context.execute_async_v2(bindings=bindings, stream_handle=stream.handle)
        # Transfer predictions back from the GPU.
        [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
        # Synchronize the stream
        stream.synchronize()
        # Return only the host outputs.
        results = [out.host for out in outputs]

        class_p = results[3].reshape(1, -1, cfg.num_classes)
        box_p = results[1].reshape(1, -1, 4)
        
        proto_p = results[0].reshape(1, int(cfg.img_size / 4), int(cfg.img_size / 4), 32)
        coef_p = results[2].reshape(1, -1, 32)
        class_p, box_p, proto_p, coef_p = torch.from_numpy(class_p), torch.from_numpy(box_p), torch.from_numpy(proto_p), torch.from_numpy(coef_p)

        ids_p, class_p, box_p, coef_p, proto_p = nms(class_p, box_p, coef_p, proto_p, anchors, cfg)

        ids_p, class_p, boxes_p, masks_p = after_nms(ids_p, class_p, box_p, coef_p, proto_p, img_h, img_w, cfg, img_name="img_name")

        end = time.time()
        fps = 1 / (end - start)

        img_numpy = draw_img(ids_p, class_p, boxes_p, masks_p, img_origin, cfg, img_name="img_name", fps=fps)
        cv2.imshow("test", img_numpy)
        cv2.waitKey(1)

    if max_fps < fps:
        max_fps = fps
    
    print(f"\rMax FPS: {int(max_fps)}, FPS: {int(fps)}", end="")

3. 运行

python detect_with_trt.py --weight='trt_files/res50_coco.engine'

lzfshub

关注

0
点赞
踩
3

收藏

觉得还不错? 一键收藏
0
评论
Yolact_minimal 代码修改及其他

使用如下导出方式比官方的快一倍左右。
复制链接

扫一扫

专栏目录

Yolact_minimal 代码修改及其他

“相关推荐”对你有帮助么？