OpenMMlab导出MaskFormer/Mask2Former模型并用onnxruntime和tensorrt推理

给算法爸爸上香

已于 2024-12-12 09:28:18 修改

阅读量827

点赞数 8

分类专栏： # model deployment # OpenMMlab # segmentation 文章标签：深度学习 pytorch openmmlab maskformer mask2former

于 2024-12-11 23:22:22 首次发布

本文链接：https://blog.csdn.net/taifyang/article/details/142152722

版权

model deployment 同时被 3 个专栏收录

35 篇文章

订阅专栏

OpenMMlab

10 篇文章

订阅专栏

segmentation

3 篇文章

订阅专栏

onnxruntime推理

使用mmdeploy导出onnx模型：

from mmdeploy.apis import torch2onnx
from mmdeploy.backend.sdk.export_info import export2SDK

# img = './bus.jpg'
# work_dir = './work_dir/onnx/maskformer'
# save_file = './end2end.onnx'
# deploy_cfg = './configs/mmdet/panoptic-seg/panoptic-seg_maskformer_onnxruntime_dynamic.py'
# model_cfg = '../mmdetection-3.3.0/configs/maskformer/maskformer_r50_ms-16xb1-75e_coco.py'
# model_checkpoint = '../checkpoints/maskformer_r50_ms-16xb1-75e_coco_20230116_095226-baacd858.pth'
# device = 'cpu'

img = './bus.jpg'
work_dir = './work_dir/onnx/mask2former'
save_file = './end2end.onnx'
deploy_cfg =  './configs/mmdet/panoptic-seg/panoptic-seg_maskformer_onnxruntime_dynamic.py'
model_cfg = '../mmdetection-3.3.0/configs/mask2former/mask2former_r50_8xb2-lsj-50e_coco.py'
model_checkpoint = '../checkpoints/mask2former_r50_8xb2-lsj-50e_coco_20220506_191028-41b088b6.pth'
device = 'cpu'

# 1. convert model to onnx
torch2onnx(img, work_dir, save_file, deploy_cfg, model_cfg, model_checkpoint, device)

# 2. extract pipeline info for sdk use (dump-info)
export2SDK(deploy_cfg, model_cfg, work_dir, pth=model_checkpoint, device=device)

自行编写python推理脚本，目前SDK尚未支持：

import cv2
import numpy as np
import onnxruntime
# import torch
# import torch.nn.functional as F


num_classes = 133
num_things_classes = 80
object_mask_thr = 0.8
iou_thr = 0.8
INSTANCE_OFFSET = 1000
resize_shape = (1333, 800) 
palette = [ ]
for i in range(num_classes):
    palette.append((np.random.randint(0, 256), np.random.randint(0, 256), np.random.randint(0, 256)))


def resize_keep_ratio(image, img_scale):
    h, w = image.shape[0], image.shape[1]
    max_long_edge = max(img_scale)
    max_short_edge = min(img_scale)
    scale_factor = min(max_long_edge / max(h, w), max_short_edge / min(h, w))
    scale_w = int(w * float(scale_factor ) + 0.5)
    scale_h = int(h * float(scale_factor ) + 0.5)
    img_new = cv2.resize(image, (scale_w, scale_h))
    return img_new

def draw_binary_masks(img, binary_masks, colors, alphas=0.8):
    binary_masks = binary_masks.astype('uint8') * 255
    binary_mask_len = binary_masks.shape[0]
    alphas = [alphas] * binary_mask_len
    for binary_mask, color, alpha in zip(binary_masks, colors, alphas):
        binary_mask_complement = cv2.bitwise_not(binary_mask)
        rgb = np.zeros_like(img)
        rgb[...] = color
        rgb = cv2.bitwise_and(rgb, rgb, mask=binary_mask)
        img_complement = cv2.bitwise_and(img, img, mask=binary_mask_complement)
        rgb = rgb + img_complement
        img = cv2.addWeighted(img, 1 - alpha, rgb, alpha, 0)
    cv2.imwrite("output.jpg", img)


if __name__=="__main__":
    image = cv2.imread('E:/vscode_workspace/mmdeploy-1.3.1/bus.jpg')
    image_resize = resize_keep_ratio(image, resize_shape) 
    input = image_resize[:, :, ::-1].transpose(2, 0, 1).astype(dtype=np.float32)  #BGR2RGB和HWC2CHW
    input[0,:] = (input[0,:] - 123.675) / 58.395   
    input[1,:] = (input[1,:] - 116.28) / 57.12
    input[2,:] = (input[2,:] - 103.53) / 57.375
    input = np.expand_dims(input, axis=0)

    import ctypes
    ctypes.CDLL('E:/vscode_workspace/mmdeploy-1.3.1/mmdeploy/lib/onnxruntime.dll')
    session_options = onnxruntime.SessionOptions()
    session_options.register_custom_ops_library('E:/vscode_workspace/mmdeploy-1.3.1/mmdeploy/lib/mmdeploy_onnxruntime_ops.dll') 
    onnx_session = onnxruntime.InferenceSession('E:/vscode_workspace/mmdeploy-1.3.1/work_dir/onnx/mask2former/end2end.onnx', session_options, providers=['CPUExecutionProvider'])
    
    input_name = []
    for node in onnx_session.get_inputs():
        input_name.append(node.name)

    output_name=[]
    for node in onnx_session.get_outputs():
        output_name.append(node.name)

    inputs = {}
    for name in input_name:
        inputs[name] = input
    
    outputs = onnx_session.run(None, inputs)
    
    batch_cls_logits = outputs[0]
    batch_mask_logits = outputs[1]
    mask_pred_results = batch_mask_logits[0][:, :image.shape[0], :image.shape[1]]
    #mask_pred = F.interpolate(mask_pred_results[:, None], size=(image.shape[0], image.shape[1]), mode='bilinear', align_corners=False)[:, 0]
    mask_pred = np.zeros((mask_pred_results.shape[0], image.shape[0], image.shape[1]))
    for i in range(mask_pred_results.shape[0]):
        mask_pred[i] = cv2.resize(mask_pred_results[i], dsize=(image.shape[1], image.shape[0]), interpolation=cv2.INTER_LINEAR)

    mask_cls = batch_cls_logits[0]
    #scores, labels = F.softmax(torch.Tensor(mask_cls), dim=-1).max(-1)
    scores = np.array([np.exp(mask_cls[i]) / np.exp(mask_cls[i]).sum() for i in range(mask_cls.shape[0])]).max(-1)
    labels = np.array([np.exp(mask_cls[i]) / np.exp(mask_cls[i]).sum() for i in range(mask_cls.shape[0])]).argmax(-1)

    #mask_pred = mask_pred.sigmoid()
    mask_pred = 1/ (1 + np.exp(-mask_pred))
    #keep = labels.ne(num_classes) & (scores > object_mask_thr)
    keep = np.not_equal(labels, num_classes) & (scores > object_mask_thr)
    cur_scores = scores[keep]
    cur_classes = labels[keep]
    cur_masks = mask_pred[keep]
    #cur_prob_masks = cur_scores.view(-1, 1, 1) * cur_masks
    cur_prob_masks = cur_scores.reshape(-1, 1, 1) * cur_masks

    h, w = cur_masks.shape[-2:]
    panoptic_seg = np.full((h, w), num_classes, dtype=np.int32)
    cur_mask_ids = cur_prob_masks.argmax(0)
    instance_id = 1
    for k in range(cur_classes.shape[0]):
        pred_class = int(cur_classes[k].item())
        isthing = pred_class < num_things_classes
        mask = cur_mask_ids == k
        mask_area = mask.sum().item()
        original_area = (cur_masks[k] >= 0.5).sum().item()
        if mask_area > 0 and original_area > 0:
            if mask_area / original_area < iou_thr:
                continue
            if not isthing:
                panoptic_seg[mask] = pred_class
            else:
                panoptic_seg[mask] = (pred_class + instance_id * INSTANCE_OFFSET)
                instance_id += 1

    ids = np.unique(panoptic_seg)[::-1]
    ids = ids[ids != num_classes]
    labels = np.array([id % INSTANCE_OFFSET for id in ids], dtype=np.int64)
    segms = (panoptic_seg[None] == ids[:, None, None])
    colors = [palette[label] for label in labels]
    draw_binary_masks(image, segms, colors)

tensorrt推理

使用mmdeploy导出engine模型：

from mmdeploy.apis import torch2onnx
from mmdeploy.backend.tensorrt.onnx2tensorrt import onnx2tensorrt
from mmdeploy.backend.sdk.export_info import export2SDK
import os

# img = 'bus.jpg'
# work_dir = './work_dir/trt/maskformer'
# save_file = './end2end.onnx'
# deploy_cfg = './configs/mmdet/panoptic-seg/panoptic-seg_maskformer_tensorrt_static-1067x800.py'
# model_cfg = '../mmdetection-3.3.0/configs/maskformer/maskformer_r50_ms-16xb1-75e_coco.py'
# model_checkpoint = '../checkpoints/maskformer_r50_ms-16xb1-75e_coco_20230116_095226-baacd858.pth'
# device = 'cuda'

img = 'bus.jpg'
work_dir = './work_dir/trt/mask2former'
save_file = './end2end.onnx'
deploy_cfg = './configs/mmdet/panoptic-seg/panoptic-seg_maskformer_tensorrt_static-1088x800.py'
model_cfg = '../mmdetection-3.3.0/configs/mask2former/mask2former_r50_8xb2-lsj-50e_coco.py'
model_checkpoint = '../checkpoints/mask2former_r50_8xb2-lsj-50e_coco_20220506_191028-41b088b6.pth'
device = 'cuda'

# 1. convert model to IR(onnx)
torch2onnx(img, work_dir, save_file, deploy_cfg, model_cfg, model_checkpoint, device)

# 2. convert IR to tensorrt
onnx_model = os.path.join(work_dir, save_file)
save_file = 'end2end.engine'
model_id = 0
device = 'cuda'
onnx2tensorrt(work_dir, save_file, model_id, deploy_cfg, onnx_model, device)

# 3. extract pipeline info for sdk use (dump-info)
export2SDK(deploy_cfg, model_cfg, work_dir, pth=model_checkpoint, device=device)

自行编写python推理脚本，目前SDK尚未支持：
maskformer

import cv2
import ctypes
import numpy as np
import tensorrt as trt
import pycuda.autoinit 
import pycuda.driver as cuda  


num_classes = 133
num_things_classes = 80
object_mask_thr = 0.8
iou_thr = 0.8
INSTANCE_OFFSET = 1000
resize_shape = (1333, 800) 
palette = [ ]
for i in range(num_classes):
    palette.append((np.random.randint(0, 256), np.random.randint(0, 256), np.random.randint(0, 256)))


def resize_keep_ratio(image, img_scale):
    h, w = image.shape[0], image.shape[1]
    max_long_edge = max(img_scale)
    max_short_edge = min(img_scale)
    scale_factor = min(max_long_edge / max(h, w), max_short_edge / min(h, w))
    scale_w = int(w * float(scale_factor ) + 0.5)
    scale_h = int(h * float(scale_factor ) + 0.5)
    img_new = cv2.resize(image, (scale_w, scale_h))
    return img_new

def draw_binary_masks(img, binary_masks, colors, alphas=0.8):
    binary_masks = binary_masks.astype('uint8') * 255
    binary_mask_len = binary_masks.shape[0]
    alphas = [alphas] * binary_mask_len
    for binary_mask, color, alpha in zip(binary_masks, colors, alphas):
        binary_mask_complement = cv2.bitwise_not(binary_mask)
        rgb = np.zeros_like(img)
        rgb[...] = color
        rgb = cv2.bitwise_and(rgb, rgb, mask=binary_mask)
        img_complement = cv2.bitwise_and(img, img, mask=binary_mask_complement)
        rgb = rgb + img_complement
        img = cv2.addWeighted(img, 1 - alpha, rgb, alpha, 0)
    cv2.imwrite("output.jpg", img)


if __name__=="__main__":
    logger = trt.Logger(trt.Logger.WARNING)
    ctypes.CDLL('E:/vscode_workspace/mmdeploy-1.3.1/mmdeploy/lib/mmdeploy_tensorrt_ops.dll')

    with open("E:/vscode_workspace/mmdeploy-1.3.1/work_dir/trt/maskformer/end2end.engine", "rb") as f, trt.Runtime(logger) as runtime:
        engine = runtime.deserialize_cuda_engine(f.read())
    context = engine.create_execution_context()

    h_input = cuda.pagelocked_empty(trt.volume(context.get_binding_shape(0)), dtype=np.float32)
    h_output0 = cuda.pagelocked_empty(trt.volume(context.get_binding_shape(1)), dtype=np.float32)
    h_output1 = cuda.pagelocked_empty(trt.volume(context.get_binding_shape(2)), dtype=np.float32)
    d_input = cuda.mem_alloc(h_input.nbytes)
    d_output0 = cuda.mem_alloc(h_output0.nbytes)
    d_output1 = cuda.mem_alloc(h_output1.nbytes)
    stream = cuda.Stream()
    
    image = cv2.imread('E:/vscode_workspace/mmdeploy-1.3.1/bus.jpg')
    image_resize = resize_keep_ratio(image, resize_shape) 
    input = image_resize[:, :, ::-1].transpose(2, 0, 1).astype(dtype=np.float32)  #BGR2RGB和HWC2CHW
    input[0,:] = (input[0,:] - 123.675) / 58.395   
    input[1,:] = (input[1,:] - 116.28) / 57.12
    input[2,:] = (input[2,:] - 103.53) / 57.375
    h_input = input.flatten()

    with engine.create_execution_context() as context:
        cuda.memcpy_htod_async(d_input, h_input, stream)
        context.execute_async_v2(bindings=[int(d_input), int(d_output0), int(d_output1)], stream_handle=stream.handle)
        cuda.memcpy_dtoh_async(h_output0, d_output0, stream)
        cuda.memcpy_dtoh_async(h_output1, d_output1, stream)
        stream.synchronize()  

        batch_cls_logits = h_output0.reshape(context.get_binding_shape(1))
        batch_mask_logits = h_output1.reshape(context.get_binding_shape(2))
        mask_pred_results = batch_mask_logits[0][:, :image.shape[0], :image.shape[1]]
        #mask_pred = F.interpolate(mask_pred_results[:, None], size=(image.shape[0], image.shape[1]), mode='bilinear', align_corners=False)[:, 0]
        mask_pred = np.zeros((mask_pred_results.shape[0], image.shape[0], image.shape[1]))
        for i in range(mask_pred_results.shape[0]):
            mask_pred[i] = cv2.resize(mask_pred_results[i], dsize=(image.shape[1], image.shape[0]), interpolation=cv2.INTER_LINEAR)

        mask_cls = batch_cls_logits[0]
        #scores, labels = F.softmax(torch.Tensor(mask_cls), dim=-1).max(-1)
        scores = np.array([np.exp(mask_cls[i]) / np.exp(mask_cls[i]).sum() for i in range(mask_cls.shape[0])]).max(-1)
        labels = np.array([np.exp(mask_cls[i]) / np.exp(mask_cls[i]).sum() for i in range(mask_cls.shape[0])]).argmax(-1)

        #mask_pred = mask_pred.sigmoid()
        mask_pred = 1/ (1 + np.exp(-mask_pred))
        #keep = labels.ne(num_classes) & (scores > object_mask_thr)
        keep = np.not_equal(labels, num_classes) & (scores > object_mask_thr)
        cur_scores = scores[keep]
        cur_classes = labels[keep]
        cur_masks = mask_pred[keep]
        #cur_prob_masks = cur_scores.view(-1, 1, 1) * cur_masks
        cur_prob_masks = cur_scores.reshape(-1, 1, 1) * cur_masks

        h, w = cur_masks.shape[-2:]
        panoptic_seg = np.full((h, w), num_classes, dtype=np.int32)
        cur_mask_ids = cur_prob_masks.argmax(0)
        instance_id = 1
        for k in range(cur_classes.shape[0]):
            pred_class = int(cur_classes[k].item())
            isthing = pred_class < num_things_classes
            mask = cur_mask_ids == k
            mask_area = mask.sum().item()
            original_area = (cur_masks[k] >= 0.5).sum().item()
            if mask_area > 0 and original_area > 0:
                if mask_area / original_area < iou_thr:
                    continue
                if not isthing:
                    panoptic_seg[mask] = pred_class
                else:
                    panoptic_seg[mask] = (pred_class + instance_id * INSTANCE_OFFSET)
                    instance_id += 1
        
        ids = np.unique(panoptic_seg)[::-1]
        ids = ids[ids != num_classes]
        labels = np.array([id % INSTANCE_OFFSET for id in ids], dtype=np.int64)
        segms = (panoptic_seg[None] == ids[:, None, None])
        max_label = int(max(labels) if len(labels) > 0 else 0)
        colors = [palette[label] for label in labels]
        draw_binary_masks(image, segms, colors)

mask2former

import cv2
import ctypes
import numpy as np
import tensorrt as trt
import pycuda.autoinit 
import pycuda.driver as cuda  


num_classes = 133
num_things_classes = 80
object_mask_thr = 0.8
iou_thr = 0.8
INSTANCE_OFFSET = 1000
resize_shape = (1333, 800) 
palette = [ ]
for i in range(num_classes):
    palette.append((np.random.randint(0, 256), np.random.randint(0, 256), np.random.randint(0, 256)))


def resize_keep_ratio(image, img_scale):
    h, w = image.shape[0], image.shape[1]
    max_long_edge = max(img_scale)
    max_short_edge = min(img_scale)
    scale_factor = min(max_long_edge / max(h, w), max_short_edge / min(h, w))
    scale_w = int(w * float(scale_factor ) + 0.5)
    scale_h = int(h * float(scale_factor ) + 0.5)
    img_new = cv2.resize(image, (scale_w, scale_h))
    return img_new

def draw_binary_masks(img, binary_masks, colors, alphas=0.8):
    binary_masks = binary_masks.astype('uint8') * 255
    binary_mask_len = binary_masks.shape[0]
    alphas = [alphas] * binary_mask_len
    for binary_mask, color, alpha in zip(binary_masks, colors, alphas):
        binary_mask_complement = cv2.bitwise_not(binary_mask)
        rgb = np.zeros_like(img)
        rgb[...] = color
        rgb = cv2.bitwise_and(rgb, rgb, mask=binary_mask)
        img_complement = cv2.bitwise_and(img, img, mask=binary_mask_complement)
        rgb = rgb + img_complement
        img = cv2.addWeighted(img, 1 - alpha, rgb, alpha, 0)
    cv2.imwrite("output.jpg", img)


if __name__=="__main__":
    logger = trt.Logger(trt.Logger.WARNING)
    ctypes.CDLL('E:/vscode_workspace/mmdeploy-1.3.1/mmdeploy/lib/mmdeploy_tensorrt_ops.dll')

    with open("E:/vscode_workspace/mmdeploy-1.3.1/work_dir/trt/mask2former/end2end.engine", "rb") as f, trt.Runtime(logger) as runtime:
        engine = runtime.deserialize_cuda_engine(f.read())
    context = engine.create_execution_context()

    h_input = cuda.pagelocked_empty(trt.volume(context.get_binding_shape(0)), dtype=np.float32)
    h_output0 = cuda.pagelocked_empty(trt.volume(context.get_binding_shape(1)), dtype=np.float32)
    h_output1 = cuda.pagelocked_empty(trt.volume(context.get_binding_shape(2)), dtype=np.float32)
    d_input = cuda.mem_alloc(h_input.nbytes)
    d_output0 = cuda.mem_alloc(h_output0.nbytes)
    d_output1 = cuda.mem_alloc(h_output1.nbytes)
    stream = cuda.Stream()
    
    image = cv2.imread('E:/vscode_workspace/mmdeploy-1.3.1/bus.jpg')
    image_resize = resize_keep_ratio(image, resize_shape) 
    scale = (image.shape[0]/image_resize.shape[0], image.shape[1]/image_resize.shape[1])
    pad_shape = (np.ceil(image_resize.shape[1]/32)*32, np.ceil(image_resize.shape[0]/32)*32) 
    pad_x, pad_y = int(pad_shape[0]-image_resize.shape[1]), int(pad_shape[1]-image_resize.shape[0])
    image_pad = cv2.copyMakeBorder(image_resize, 0, pad_y, 0, pad_x, cv2.BORDER_CONSTANT, value=0)
    input = image_pad[:, :, ::-1].transpose(2, 0, 1).astype(dtype=np.float32)  #BGR2RGB和HWC2CHW   
    input[0,:] = (input[0,:] - 123.675) / 58.395   
    input[1,:] = (input[1,:] - 116.28) / 57.12
    input[2,:] = (input[2,:] - 103.53) / 57.375
    h_input = input.flatten()

    with engine.create_execution_context() as context:
        cuda.memcpy_htod_async(d_input, h_input, stream)
        context.execute_async_v2(bindings=[int(d_input), int(d_output0), int(d_output1)], stream_handle=stream.handle)
        cuda.memcpy_dtoh_async(h_output0, d_output0, stream)
        cuda.memcpy_dtoh_async(h_output1, d_output1, stream)
        stream.synchronize()  

        batch_cls_logits = h_output0.reshape(context.get_binding_shape(1))
        batch_mask_logits = h_output1.reshape(context.get_binding_shape(2))
        mask_pred_results = batch_mask_logits[0][:, :image.shape[0], :image.shape[1]]
        #mask_pred = F.interpolate(mask_pred_results[:, None], size=(image.shape[0], image.shape[1]), mode='bilinear', align_corners=False)[:, 0]
        mask_pred = np.zeros((mask_pred_results.shape[0], image.shape[0], image.shape[1]))
        for i in range(mask_pred_results.shape[0]):
            mask_pred[i] = cv2.resize(mask_pred_results[i], dsize=(image.shape[1], image.shape[0]), interpolation=cv2.INTER_LINEAR)

        mask_cls = batch_cls_logits[0]
        #scores, labels = F.softmax(torch.Tensor(mask_cls), dim=-1).max(-1)
        scores = np.array([np.exp(mask_cls[i]) / np.exp(mask_cls[i]).sum() for i in range(mask_cls.shape[0])]).max(-1)
        labels = np.array([np.exp(mask_cls[i]) / np.exp(mask_cls[i]).sum() for i in range(mask_cls.shape[0])]).argmax(-1)

        #mask_pred = mask_pred.sigmoid()
        mask_pred = 1/ (1 + np.exp(-mask_pred))
        #keep = labels.ne(num_classes) & (scores > object_mask_thr)
        keep = np.not_equal(labels, num_classes) & (scores > object_mask_thr)
        cur_scores = scores[keep]
        cur_classes = labels[keep]
        cur_masks = mask_pred[keep]
        #cur_prob_masks = cur_scores.view(-1, 1, 1) * cur_masks
        cur_prob_masks = cur_scores.reshape(-1, 1, 1) * cur_masks

        h, w = cur_masks.shape[-2:]
        panoptic_seg = np.full((h, w), num_classes, dtype=np.int32)
        cur_mask_ids = cur_prob_masks.argmax(0)
        instance_id = 1
        for k in range(cur_classes.shape[0]):
            pred_class = int(cur_classes[k].item())
            isthing = pred_class < num_things_classes
            mask = cur_mask_ids == k
            mask_area = mask.sum().item()
            original_area = (cur_masks[k] >= 0.5).sum().item()
            if mask_area > 0 and original_area > 0:
                if mask_area / original_area < iou_thr:
                    continue
                if not isthing:
                    panoptic_seg[mask] = pred_class
                else:
                    panoptic_seg[mask] = (pred_class + instance_id * INSTANCE_OFFSET)
                    instance_id += 1
        
        ids = np.unique(panoptic_seg)[::-1]
        ids = ids[ids != num_classes]
        labels = np.array([id % INSTANCE_OFFSET for id in ids], dtype=np.int64)
        segms = (panoptic_seg[None] == ids[:, None, None])
        max_label = int(max(labels) if len(labels) > 0 else 0)
        colors = [palette[label] for label in labels]
        draw_binary_masks(image, segms, colors)