原官方keras 版 maskrcnn 转onnx,并使用onnxruntime gpu c++ 推理

该博客详细介绍了如何将Keras实现的Mask R-CNN模型转换为ONNX格式,并在C++中使用ONNX Runtime进行推理。首先,通过keras2onnx库将模型转换为ONNX,然后针对特定操作(如CropAndResize和DetectionLayer)定义了自定义转换函数。接着,展示了如何在C++中设置输入张量并运行推理,包括预处理和后处理步骤,用于从ONNX模型生成实例分割结果。
摘要由CSDN通过智能技术生成

https://github.com/dulvqingyun/onnxruntime-maskrcnn/blob/master/newC%2B%2B

1.keras 下模型转onnx

import os
import sys
import numpy as np
import skimage
import onnx
import keras2onnx

from mrcnn.config import Config
from mrcnn.model import BatchNorm, DetectionLayer
from mrcnn import model as modellib
from mrcnn import visualize

from keras2onnx import set_converter
from keras2onnx.ke2onnx.batch_norm import convert_keras_batch_normalization
from keras2onnx.proto import onnx_proto
from keras2onnx.common.onnx_ops import apply_transpose, apply_identity
from keras2onnx.common.onnx_ops import OnnxOperatorBuilder
from os.path import dirname, abspath
sys.path.insert(0, os.path.join(dirname(abspath(__file__)), '../../tests/'))
# from test_utils import convert_tf_crop_and_resize


ROOT_DIR = os.path.abspath("./")

# Directory to save logs and trained model
MODEL_DIR = os.path.join(ROOT_DIR, "logs")

# Path to trained weights file
COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")


class CocoConfig(Config):
    """Configuration for training on MS COCO.
    Derives from the base Config class and overrides values specific
    to the COCO dataset.
    """
    # Give the configuration a recognizable name
    NAME = "coco"

    # We use a GPU with 12GB memory, which can fit two images.
    # Adjust down if you use a smaller GPU.
    IMAGES_PER_GPU = 2

    # Uncomment to train on 8 GPUs (default is 1)
    # GPU_COUNT = 8

    # Number of classes (including background)
    NUM_CLASSES = 1 + 1  # COCO has 80 classes


class InferenceConfig(CocoConfig):
    # Set batch size to 1 since we'll be running inference on
    # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1


config = InferenceConfig()
config.display()

model = modellib.MaskRCNN(mode="inference", model_dir=MODEL_DIR, config=config)

# Load weights trained on MS-COCO
model.load_weights('samples/balloon/mask_rcnn_balloon.h5', by_name=True)


def convert_tf_crop_and_resize(scope, operator, container):

    if operator.target_opset < 11:
        raise ValueError("CropAndResize op is not supported for opset < 11")
    oopb = OnnxOperatorBuilder(container, scope)
    node = operator.raw_operator
    mode_value = node.get_attr('method')
    transpose_node = oopb.apply_transpose(operator.inputs[0].full_name,
                                          name=operator.full_name + '_transpose_1',
                                          perm=[0, 3, 1, 2])
    cropandresize = oopb.add_node('CropAndResize',
                                  transpose_node + operator.input_full_names[1:],
                                  operator.full_name + '_crop_and_resize',
                                  op_domain='com.microsoft',
                                  op_version=1,
                                  mode=mode_value)
    oopb.apply_op_with_output("apply_transpose",
                              cropandresize,
                              operator.output_full_names,
                              name=operator.full_name + '_transpose_final',
                              perm=[0, 2, 3, 1])


def convert_BatchNorm(scope, operator, container):
    convert_keras_batch_normalization(scope, operator, container)


def convert_apply_box_deltas_graph(scope, operator, container, oopb, box_transpose, score_identity, deltas_transpose, windows_transpose):
    oopb = OnnxOperatorBuilder(container, scope)
    box_squeeze = oopb.apply_squeeze(box_transpose, name=operator.full_name + '_box_squeeze', axes=[0])[0]
    # output shape: [spatial_dimension, 4]

    deltas_squeeze = oopb.apply_squeeze(deltas_transpose, name=operator.full_name + '_deltas_squeeze', axes=[0])[0]
    # output shape: [spatial_dimension, num_classes, 4]

    score_squeeze = oopb.apply_squeeze(score_identity, name=operator.full_name + '_score_squeeze', axes=[0])[0]
    # output shape: [spatial_dimension, num_classes]

    class_ids = scope.get_unique_variable_name('class_ids')
    attrs = {'axis': 1}
    container.add_node('ArgMax', score_squeeze, class_ids, op_version=operator.target_opset,
                       **attrs)
    # output shape: [spatial_dimension, 1]

    prob_shape = oopb.add_node('Shape',
                                 [score_squeeze],
                                 operator.inputs[1].full_name + '_prob_shape')
    prob_shape_0 = oopb.add_node('Slice',
                         [prob_shape,
                          ('_start', oopb.int64, np.array([0], dtype='int64')),
                          ('_end', oopb.int64, np.array([1], dtype='int64')),
                          ('_axes', oopb.int64, np.array([0], dtype='int64'))
                          ],
                         operator.inputs[1].full_name + '_prob_shape_0')
    prob_range = oopb.add_node('Range',
                         [('_start', oopb.int64, np.array([0], dtype='int64')),
                          prob_shape_0,
                          # ('_limit', oopb.int64, np.array([1000], dtype='int64')),
                          ('_delta', oopb.int64, np.array([1], dtype='int64'))
                          ],
                         operator.inputs[1].full_name + '_prob_range',
                         op_domain='com.microsoft',
                         op_version=1)

    prob_range_unsqueeze = oopb.apply_unsqueeze([prob_range],
                                                operator.inputs[1].full_name + '_prob_range_unsqueeze',
                                                axes=[1])[0]
    # output shape: [spatial_dimension, 1]

    attrs = {'axis': 1}
    indices = oopb.add_node('Concat',
                         [prob_range_unsqueeze,
                          class_ids
                          ],
                         operator.inputs[1].full_name + '_indices', **attrs)
    # output shape: [spatial_dimension, 2]

    deltas_specific = oopb.add_node('GatherND',
                         [deltas_squeeze, indices],
                         operator.inputs[2].full_name + '_deltas_specific')
    # output shape: [spatial_dimension, 4]

    BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2], dtype='float32')
    delta_mul_output = oopb.add_node('Mul',
                                     [deltas_specific,
                                      ('_mul_constant', oopb.float, BBOX_STD_DEV)
                                     ],
                                     operator.inputs[2].full_name + '_mul')
    # output shape: [spatial_dimension, 4]

    box_0 = oopb.add_node('Slice',
                         [box_squeeze,
                          ('_start', oopb.int64, np.array([0], dtype='int64')),
                          ('_end', oopb.int64, np.array([1], dtype='int64')),
                          ('_axes', oopb.int64, np.array([1], dtype='int64'))
                          ],
                         operator.inputs[0].full_name + '_sliced_0')
    box_1 = oopb.add_node('Slice',
                          [box_squeeze,
                           ('_start', oopb.int64, np.array([1], dtype='int64')),
                           ('_end', oopb.int64, np.array([2], dtype='int64')),
                           ('_axes', oopb.int64, np.array([1], dtype='int64'))
                           ],
                          operator.inputs[0].full_name + '_sliced_1')
    box_2 = oopb.add_node('Slice',
                          [box_squeeze,
                           ('_start', oopb.int64, np.array([2], dtype='int64')),
                           ('_end', oopb.int64, np.array([3], dtype='int64')),
                           ('_axes', oopb.int64, np.array([1], dtype='int64'))
                           ],
                          operator.inputs[0].full_name + '_sliced_2')
    box_3 = oopb.add_node('Slice',
                          [box_squeeze,
                           ('_start', oopb.int64, np.array([3], dtype='int64')),
                           ('_end', oopb.int64, np.array([4], dtype='int64')),
                           ('_axes', oopb.int64, np.array([1], dtype='int64'))
                           ],
                          operator.inputs[0].full_name + '_sliced_3')

    delta_0 = oopb.add_node('Slice',
                         [delta_mul_output,
                          ('_start', oopb.int64, np.array([0], dtype='int64')),
                          ('_end', oopb.int64, np.array([1], dtype='int64')),
                          ('_axes', oopb.int64, np.array([1], dtype='int64'))
                          ],
                         operator.inputs[3].full_name + '_sliced_0')
    delta_1 = oopb.add_node('Slice',
                          [delta_mul_output,
                           ('_start', oopb.int64, np.array([1], dtype='int64')),
                           ('_end', oopb.int64, np.array([2], dtype='int64')),
                           ('_axes', oopb.int64, np.array([1], dtype='int64'))
                           ],
                          operator.inputs[3].full_name + '_sliced_1')
    delta_2 = oopb.add_node('Slice',
                          [delta_mul_output,
                           ('_start', oopb.int64, np.array([2], dtype='int64')),
                           ('_end', oopb.int64, np.array([3], dtype='int64')),
                           ('_axes', oopb.int64, np.array([1], dtype='int64'))
                           ],
                          operator.inputs[3].full_name + '_sliced_2')
    delta_3 = oopb.add_node('Slice',
                          [delta_mul_output,
                           ('_start', oopb.int64, np.array([3], dtype='int64')),
                           ('_end', oopb.int64, np.array([4], dtype='int64')),
                           ('_axes', oopb.int64, np.array([1], dtype='int64'))
                           ],
                          operator.inputs[3].full_name + '_sliced_3')

    height = oopb.add_node('Sub',
                          [box_2, box_0],
                          operator.inputs[0].full_name + '_height')
    width = oopb.add_node('Sub',
                          [box_3, box_1],
                          operator.inputs[0].full_name + '_width')

    half_height_0 = oopb.add_node('Mul',
                                  [height,
                                   ('_mul_constant', oopb.float, np.array([0.5], dtype='float32'))
                                  ],
                                  operator.inputs[0].full_name + '_half_height_0')
    half_width_0 = oopb.add_node('Mul',
                                  [width,
                                   ('_mul_constant', oopb.float, np.array([0.5], dtype='float32'))
                                  ],
                                  operator.inputs[0].full_name + '_half_width_0')
    center_y_0 = oopb.add_node('Add',
                               [box_0, half_height_0],
                               operator.inputs[0].full_name + '_center_y_0')
    center_x_0 = oopb.add_node('Add',
                               [box_1, half_width_0],
                               operator.inputs[0].full_name + '_center_x_0')

    delta_height = oopb.add_node('Mul',
                               [delta_0, height],
                               operator.inputs[0].full_name + '_delta_height')
    delta_width = oopb.add_node('Mul',
                               [delta_1, width],
                               operator.inputs[0].full_name + '_delta_width')
    center_y_1 = oopb.add_node('Add',
                               [center_y_0, delta_height],
                               operator.inputs[0].full_name + '_center_y_1')
    center_x_1 = oopb.add_node('Add',
                               [center_x_0, delta_width],
                               operator.inputs[0].full_name + '_center_x_1')

    delta_2_exp = oopb.add_node('Exp',
                                [delta_2],
                                operator.inputs[0].full_name + '_delta_2_exp')
    delta_3_exp = oopb.add_node('Exp',
                                [delta_3],
                                operator.inputs[0].full_name + '_delta_3_exp')
    height_exp = oopb.add_node('Mul',
                                 [height, delta_2_exp],
                                 operator.inputs[0].full_name + '_height_exp')
    width_exp = oopb.add_node('Mul',
                                [width, delta_3_exp],
                                operator.inputs[0].full_name + '_width_exp')

    half_height_1 = oopb.add_node('Mul',
                                  [height_exp,
                                   ('_mul_constant', oopb.float, np.array([0.5], dtype='float32'))
                                  ],
                                  operator.inputs[0].full_name + '_half_height_1')
    half_width_1 = oopb.add_node('Mul',
                                  [width_exp,
                                   ('_mul_constant', oopb.float, np.array([0.5], dtype='float32'))
                                  ],
                                  operator.inputs[0].full_name + '_half_width_1')
    y1 = oopb.add_node('Sub',
                          [center_y_1, half_height_1],
                          operator.inputs[0].full_name + '_y1')
    x1 = oopb.add_node('Sub',
                          [center_x_1, half_width_1],
                          operator.inputs[0].full_name + '_x1')
    y2 = oopb.add_node('Add',
                               [y1, height_exp],
                               operator.inputs[0].full_name + '_y2')
    x2 = oopb.add_node('Add',
                               [x1, width_exp],
                               operator.inputs[0].full_name + '_x2')

    windows_squeeze = oopb.apply_squeeze(windows_transpose, name=operator.full_name + '_windows_squeeze',
                                         axes=[0])[0]
    wy1 = oopb.add_node('Slice',
                         [windows_squeeze,
                          ('_start', oopb.int64, np.array([0], dtype='int64')),
                          ('_end', oopb.int64, np.array([1], dtype='int64')),
                          ('_axes', oopb.int64, np.array([0], dtype='int64'))
                          ],
                         operator.inputs[0].full_name + '_windows_0')
    wx1 = oopb.add_node('Slice',
                          [windows_squeeze,
                           ('_start', oopb.int64, np.array([1], dtype='int64')),
                           ('_end', oopb.int64, np.array([2], dtype='int64')),
                           ('_axes', oopb.int64, np.array([0], dtype='int64'))
                           ],
                          operator.inputs[0].full_name + '_windows_1')
    wy2 = oopb.add_node('Slice',
                          [windows_squeeze,
                           ('_start', oopb.int64, np.array([2], dtype='int64')),
                           ('_end', oopb.int64, np.array([3], dtype='int64')),
                           ('_axes', oopb.int64, np.array([0], dtype='int64'))
                           ],
                          operator.inputs[0].full_name + '_windows_2')
    wx2 = oopb.add_node('Slice',
                          [windows_squeeze,
                           ('_start', oopb.int64, np.array([3], dtype='int64')),
                           ('_end', oopb.int64, np.array([4], dtype='int64')),
                           ('_axes', oopb.int64, np.array([0], dtype='int64'))
                           ],
                          operator.inputs[0].full_name + '_windows_3')
    y1_min = oopb.add_node('Min',
                       [y1, wy2],
                       operator.inputs[0].full_name + '_y1_min')
    x1_min = oopb.add_node('Min',
                       [x1, wx2],
                       operator.inputs[0].full_name + '_x1_min')
    y2_min = oopb.add_node('Min',
                       [y2, wy2],
                       operator.inputs[0].full_name + '_y2_min')
    x2_min = oopb.add_node('Min',
                       [x2, wx2],
                       operator.inputs[0].full_name + '_x2_min')
    y1_max = oopb.add_node('Max',
                           [y1_min, wy1],
                           operator.inputs[0].full_name + '_y1_max')
    x1_max = oopb.add_node('Max',
                           [x1_min, wx1],
                           operator.inputs[0].full_name + '_x1_max')
    y2_max = oopb.add_node('Max',
                           [y2_min, wy1],
                           operator.inputs[0].full_name + '_y2_max')
    x2_max = oopb.add_node('Max',
                           [x2_min, wx1],
                           operator.inputs[0].full_name + '_x2_max')
    concat_result = scope.get_unique_variable_name(operator.output_full_names[0] + '_concat_result')
    attrs = {'axis': 1}
    container.add_node("Concat",
                       [y1_max, x1_max, y2_max, x2_max],
                       concat_result,
                       op_version=operator.target_opset,
                       name=operator.outputs[0].full_name + '_concat_result', **attrs)

    concat_unsqueeze = oopb.apply_unsqueeze(concat_result, name=operator.full_name + '_concat_unsqueeze',
                                            axes=[0])[0]
    return concat_unsqueeze


def norm_boxes_graph(scope, operator, container, oopb, image_meta):
    image_shapes = oopb.add_node('Slice',
                         [image_meta,
                          ('_start', oopb.int64, np.array([4], dtype='int64')),
                          ('_end', oopb.int64, np.array([7], dtype='int64')),
                          ('_axes', oopb.int64, np.array([1], dtype='int64'))
                          ],
                         operator.inputs[0].full_name + '_image_shapes')
    image_shape = oopb.add_node('Slice',
                                 [image_shapes,
                                  ('_start', oopb.int64, np.array([0], dtype='int64')),
                                  ('_end', oopb.int64, np.array([1], dtype='int64')),
                                  ('_axes', oopb.int64, np.array([0], dtype='int64'))
                                  ],
                                 operator.inputs[0].full_name + '_image_shape')
    image_shape_squeeze = oopb.apply_squeeze(image_shape, name=operator.full_name + '_image_shape_squeeze', axes=[0])[0]

    window = oopb.add_node('Slice',
                            [image_meta,
                             ('_start', oopb.int64, np.array([7], dtype='int64')),
                             ('_end', oopb.int64, np.array([11], dtype='int64')),
                             ('_axes', oopb.int64, np.array([1], dtype='int64'))
                             ],
                            operator.inputs[0].full_name + '_window')
    h_norm = oopb.add_node('Slice',
                         [image_shape_squeeze,
                          ('_start', oopb.int64, np.array([0], dtype='int64')),
                          ('_end', oopb.int64, np.array([1], dtype='int64')),
                          ('_axes', oopb.int64, np.array([0], dtype='int64'))
                          ],
                         operator.inputs[0].full_name + '_h_norm')
    w_norm = oopb.add_node('Slice',
                           [image_shape_squeeze,
                            ('_start', oopb.int64, np.array([1], dtype='int64')),
                            ('_end', oopb.int64, np.array([2], dtype='int64')),
                            ('_axes', oopb.int64, np.array([0], dtype='int64'))
                            ],
                           operator.inputs[0].full_name + '_w_norm')
    h_norm_float = scope.get_unique_variable_name('h_norm_float')
    attrs = {'to': 1}
    container.add_node('Cast', h_norm, h_norm_float, op_version=operator.target_opset,
                       **attrs)
    w_norm_float = scope.get_unique_variable_name('w_norm_float')
    attrs = {'to': 1}
    container.add_node('Cast', w_norm, w_norm_float, op_version=operator.target_opset,
                       **attrs)
    hw_concat = scope.get_unique_variable_name(operator.inputs[0].full_name + '_hw_concat')
    attrs = {'axis': -1}
    container.add_node("Concat",
                       [h_norm_float, w_norm_float, h_norm_float, w_norm_float],
                       hw_concat,
                       op_version=operator.target_opset,
                       name=operator.inputs[0].full_name + '_hw_concat', **attrs)
    scale = oopb.add_node('Sub',
                          [hw_concat,
                           ('_sub', oopb.float, np.array([1.0], dtype='float32'))
                           ],
                          operator.inputs[0].full_name + '_scale')
    boxes_shift = oopb.add_node('Sub',
                          [window,
                           ('_sub', oopb.float, np.array([0.0, 0.0, 1.0, 1.0], dtype='float32'))
                           ],
                          operator.inputs[0].full_name + '_boxes_shift')
    divide = oopb.add_node('Div',
                            [boxes_shift, scale],
                            operator.inputs[0].full_name + '_divide')
    # output shape: [batch, 4]
    return divide


def convert_DetectionLayer(scope, operator, container):
    # type: (keras2onnx.common.InterimContext, keras2onnx.common.Operator, keras2onnx.common.OnnxObjectContainer) -> None
    DETECTION_MAX_INSTANCES = 100
    DETECTION_NMS_THRESHOLD = 0.3
    DETECTION_MIN_CONFIDENCE = 0.7

    oopb = OnnxOperatorBuilder(container, scope)
    box_transpose = scope.get_unique_variable_name(operator.inputs[0].full_name + '_tx')
    score_transpose = scope.get_unique_variable_name(operator.inputs[1].full_name + '_tx')

    # apply_transpose(scope, operator.inputs[0].full_name, box_transpose, container, perm=[2, 0, 1])
    apply_identity(scope, operator.inputs[0].full_name, box_transpose, container)
    # output shape: [num_batches, spatial_dimension, 4]
    score_identity = scope.get_unique_variable_name(operator.inputs[1].full_name + '_id')
    apply_identity(scope, operator.inputs[1].full_name, score_identity, container)
    # output shape: [num_batches, spatial_dimension, num_classes]

    deltas_transpose = scope.get_unique_variable_name(operator.inputs[2].full_name + '_tx')
    apply_identity(scope, operator.inputs[2].full_name, deltas_transpose, container)
    image_meta = scope.get_unique_variable_name(operator.inputs[3].full_name + '_tx')
    apply_identity(scope, operator.inputs[3].full_name, image_meta, container)
    windows_transpose = norm_boxes_graph(scope, operator, container, oopb, image_meta)
    delta_mul_output = convert_apply_box_deltas_graph(scope, operator, container, oopb, box_transpose, score_identity, deltas_transpose, windows_transpose)

    sliced_score = oopb.add_node('Slice',
                                 [score_identity,
                                  ('_start', oopb.int64, np.array([1], dtype='int64')),
                                  ('_end', oopb.int64, np.array([81], dtype='int64')),
                                  ('_axes', oopb.int64, np.array([2], dtype='int64'))
                                  ],
                                 operator.inputs[1].full_name + '_sliced')
    apply_transpose(scope, sliced_score, score_transpose, container, perm=[0, 2, 1])
    # output shape: [num_batches, num_classes, spatial_dimension]

    max_output_size = scope.get_unique_variable_name('max_output_size')
    iou_threshold = scope.get_unique_variable_name('iou_threshold')
    score_threshold = scope.get_unique_variable_name('layer.score_threshold')

    container.add_initializer(max_output_size, onnx_proto.TensorProto.INT64,
                              [], [DETECTION_MAX_INSTANCES])
    container.add_initializer(iou_threshold, onnx_proto.TensorProto.FLOAT,
                              [], [DETECTION_NMS_THRESHOLD])
    container.add_initializer(score_threshold, onnx_proto.TensorProto.FLOAT,
                              [], [DETECTION_MIN_CONFIDENCE])

    nms_node = next((nd_ for nd_ in operator.nodelist if nd_.type == 'NonMaxSuppressionV3'), operator.nodelist[0])
    nms_output = scope.get_unique_variable_name(operator.output_full_names[0] + '_nms')
    container.add_node("NonMaxSuppression",
                       [delta_mul_output, score_transpose, max_output_size, iou_threshold, score_threshold],
                       nms_output,
                       op_version=operator.target_opset,
                       name=nms_node.name)

    add_init = scope.get_unique_variable_name('add')
    container.add_initializer(add_init, onnx_proto.TensorProto.INT64,
                              [1, 3], [0, 1, 0])
    nms_output_add = scope.get_unique_variable_name(operator.output_full_names[0] + '_class_add')
    container.add_node("Add",
                       [nms_output, add_init],
                       nms_output_add,
                       op_version=operator.target_opset,
                       name=nms_node.name + '_class_idx_add')

    starts_init = scope.get_unique_variable_name('starts')
    ends_init = scope.get_unique_variable_name('ends')
    axes_init = scope.get_unique_variable_name('axes')

    container.add_initializer(starts_init, onnx_proto.TensorProto.INT32,
                              [1], [1])
    container.add_initializer(ends_init, onnx_proto.TensorProto.INT32,
                              [1], [2])
    container.add_initializer(axes_init, onnx_proto.TensorProto.INT32,
                              [1], [1])

    class_idx_output = scope.get_unique_variable_name(operator.output_full_names[0] + '_class_idx')
    container.add_node("Slice",
                       [nms_output_add, starts_init, ends_init, axes_init],
                       class_idx_output,
                       op_version=operator.target_opset,
                       name=nms_node.name+'_class_idx')
    # output shape: [num_selected_indices, 1]

    starts_init_2 = scope.get_unique_variable_name('starts')
    ends_init_2 = scope.get_unique_variable_name('ends')
    axes_init_2 = scope.get_unique_variable_name('axes')

    container.add_initializer(starts_init_2, onnx_proto.TensorProto.INT32,
                              [1], [2])
    container.add_initializer(ends_init_2, onnx_proto.TensorProto.INT32,
                              [1], [3])
    container.add_initializer(axes_init_2, onnx_proto.TensorProto.INT32,
                              [1], [1])

    box_idx_output = scope.get_unique_variable_name(operator.output_full_names[0] + '_box_idx')
    container.add_node("Slice",
                       [nms_output_add, starts_init_2, ends_init_2, axes_init_2],
                       box_idx_output,
                       op_version=operator.target_opset,
                       name=nms_node.name + '_box_idx')
    # output shape: [num_selected_indices, 1]

    box_idx_squeeze = oopb.apply_squeeze(box_idx_output,
                                        name=nms_node.name + '_box_idx_squeeze', axes=[1])[0]
    # output shape: [num_selected_indices]

    starts_init_3 = scope.get_unique_variable_name('starts')
    ends_init_3 = scope.get_unique_variable_name('ends')
    axes_init_3 = scope.get_unique_variable_name('axes')
    step_init_3 = scope.get_unique_variable_name('steps')

    container.add_initializer(starts_init_3, onnx_proto.TensorProto.INT32,
                              [1], [2])
    container.add_initializer(ends_init_3, onnx_proto.TensorProto.INT32,
                              [1], [0])
    container.add_initializer(axes_init_3, onnx_proto.TensorProto.INT32,
                              [1], [1])
    container.add_initializer(step_init_3, onnx_proto.TensorProto.INT32,
                              [1], [-1])
    from keras2onnx.common.data_types import Int32TensorType, FloatTensorType
    class_box_idx_output = scope.get_local_variable_or_declare_one(operator.output_full_names[0] + '_class_box_idx',
                                                            type=Int32TensorType(shape=[None, 2]))
    container.add_node("Slice",
                       [nms_output_add, starts_init_3, ends_init_3, axes_init_3, step_init_3],
                       class_box_idx_output.full_name,
                       op_version=operator.target_opset,
                       name=nms_node.name + '_class_box_idx')
    # output shape: [num_selected_indices, 2]

    box_squeeze = oopb.apply_squeeze(delta_mul_output,
                                     name=nms_node.name + '_box_squeeze', axes=[0])[0]
    # output shape: [spatial_dimension, 4]

    score_squeeze = oopb.apply_squeeze(score_identity,
                                       name=nms_node.name + '_score_squeeze', axes=[0])[0]
    # output shape: [spatial_dimension, num_classes]

    box_gather = scope.get_unique_variable_name(operator.output_full_names[0] + '_box_gather')
    attrs = {'axis': 0}
    container.add_node("Gather",
                       [box_squeeze, box_idx_squeeze],
                       box_gather,
                       op_version=operator.target_opset,
                       name=nms_node.name + '_box_gather', **attrs)
    # output shape: [num_selected_indices, 4]

    score_gather = scope.get_unique_variable_name(operator.output_full_names[0] + '_score_gather')
    container.add_node("GatherND",
                       [score_squeeze, class_box_idx_output.full_name],
                       score_gather,
                       op_version=operator.target_opset,
                       name=nms_node.name + '_score_gather')
    # output shape: [num_selected_indices]

    score_gather_unsqueeze = oopb.apply_unsqueeze(score_gather,
                                                  name=nms_node.name + '_score_gather_unsqueeze', axes=[1])[0]
    # output shape: [num_selected_indices, 1]


    top_k_var = scope.get_unique_variable_name('topK')
    container.add_initializer(top_k_var, onnx_proto.TensorProto.FLOAT,
                              [1], [100.0])

    score_gather_shape = oopb.add_node('Shape',
                                       [score_gather],
                                       operator.inputs[1].full_name + '_score_gather_shape')
    attrs = {'to': 1}
    scope_gather_float = oopb.add_node('Cast',
                                       [score_gather_shape],
                                       operator.inputs[1].full_name + '_scope_gather_float', **attrs)
    top_k_min = oopb.add_node('Min',
                              [scope_gather_float, top_k_var],
                              operator.inputs[1].full_name + '_top_k_min')
    attrs = {'to': 7}
    top_k_min_int = oopb.add_node('Cast',
                                   [top_k_min],
                                   operator.inputs[1].full_name + '_top_k_min_int', **attrs)


    score_top_k_output_val = scope.get_unique_variable_name(operator.output_full_names[0] + '_score_top_k_output_val')
    # output shape: [num_top_K]
    score_top_k_output_idx = scope.get_unique_variable_name(operator.output_full_names[0] + '_score_top_k_output_idx')
    # output shape: [num_top_K]
    attrs = {'axis': 0}
    container.add_node('TopK', [score_gather, top_k_min_int], [score_top_k_output_val, score_top_k_output_idx],
                       op_version=operator.target_opset,
                       name=nms_node.name + '_topK', **attrs)

    class_idx_cast = scope.get_unique_variable_name(operator.output_full_names[0] + '_class_idx_cast')
    attrs = {'to': 1}
    container.add_node('Cast', class_idx_output, class_idx_cast, op_version=operator.target_opset,
                       name=nms_node.name+'_class_idx_cast', **attrs)
    # output shape: [num_selected_indices, 1]

    concat_var = scope.get_unique_variable_name(operator.output_full_names[0] + '_concat_var')
    concat_node = next((nd_ for nd_ in operator.nodelist if nd_.type == 'Concat'), operator.nodelist[0])
    attrs = {'axis': 1}
    container.add_node("Concat",
                       [box_gather, class_idx_cast, score_gather_unsqueeze],
                       concat_var,
                       op_version=operator.target_opset,
                       name=concat_node.name, **attrs)
    # output shape: [num_selected_indices, 6]

    all_gather = scope.get_unique_variable_name(operator.output_full_names[0] + '_all_gather')
    attrs = {'axis': 0}
    container.add_node("Gather",
                       [concat_var, score_top_k_output_idx],
                       all_gather,
                       op_version=operator.target_opset,
                       name=nms_node.name + '_all_gather', **attrs)
    # output shape: [num_top_K, 6]
    padded_result = oopb.add_node('Pad',
                                  [all_gather,
                                   np.array([0, 0, DETECTION_MAX_INSTANCES, 0],
                                            dtype=np.int64)],
                                  nms_node.name + '_padded_result')
    detection_final = oopb.add_node('Slice',
                                 [padded_result,
                                  ('_start', oopb.int64, np.array([0], dtype='int64')),
                                  ('_end', oopb.int64, np.array([DETECTION_MAX_INSTANCES], dtype='int64')),
                                  ('_axes', oopb.int64, np.array([0], dtype='int64'))
                                  ],
                                 nms_node.name + '_detection_final'
                                 )

    oopb.apply_op_with_output('apply_unsqueeze',
                              detection_final,
                              operator.output_full_names[0],
                              name=nms_node.name + '_concat_unsqueeze', axes=[0])
    # output shape: [1, num_top_K, 6]


set_converter(DetectionLayer, convert_DetectionLayer)
set_converter(BatchNorm, convert_BatchNorm)


# Run detection
class_names = ['BG', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
               'bus', 'train', 'truck', 'boat', 'traffic light',
               'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird',
               'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear',
               'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
               'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
               'kite', 'baseball bat', 'baseball glove', 'skateboard',
               'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup',
               'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
               'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
               'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
               'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
               'keyboard', 'cell phone', 'microwave', 'oven', 'toaster',
               'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors',
               'teddy bear', 'hair drier', 'toothbrush']


def generate_image(images, molded_images, windows, results):
    results_final = []
    for i, image in enumerate(images):
        final_rois, final_class_ids, final_scores, final_masks = \
            model.unmold_detections(results[0][i], results[3][i], # detections[i], mrcnn_mask[i]
                                   image.shape, molded_images[i].shape,
                                   windows[i])
        results_final.append({
            "rois": final_rois,
            "class_ids": final_class_ids,
            "scores": final_scores,
            "masks": final_masks,
        })
        r = results_final[i]
        visualize.display_instances(image, r['rois'], r['masks'], r['class_ids'],
                                    class_names, r['scores'])
    return results_final


if __name__ == '__main__':


    model_file_name = 'mrcnn.onnx'
    if not os.path.exists(model_file_name):
        # use opset 11 or later
        set_converter('CropAndResize', convert_tf_crop_and_resize)
        oml = keras2onnx.convert_keras(model.keras_model, target_opset=11)
        onnx.save_model(oml, model_file_name)

    # run with ONNXRuntime
    import onnxruntime
    filename = 'demo3.jpg'
    image = skimage.io.imread(filename)
    images = [image]

    sess = onnxruntime.InferenceSession(model_file_name,providers=['TensorrtExecutionProvider', 'CUDAExecutionProvider', 'CPUExecutionProvider'])

    # preprocessing
    molded_images, image_metas, windows = model.mold_inputs(images)
    anchors = model.get_anchors(molded_images[0].shape)
    anchors = np.broadcast_to(anchors, (model.config.BATCH_SIZE,) + anchors.shape)

    results = \
        sess.run(None, {"input_image": molded_images.astype(np.float32),
                        "input_anchors": anchors,
                        "input_image_meta": image_metas.astype(np.float32)})

    # postprocessing
    results_final = generate_image(images, molded_images, windows, results)
    anchors=np.squeeze(anchors)
    np.savetxt('anchors.txt',anchors)

2.用转化好的onnx,在c++下 使用onnxruntime推理

#include <windows.h>
#include <windowsx.h>
#include "opencv.hpp" //opencv 的头文件
#include <opencv2/core/core.hpp>  
#include <opencv2/opencv.hpp>
#include <opencv2/highgui/highgui.hpp>  
#include <opencv2/imgproc/imgproc.hpp> 

#include <vector>
#include <stdlib.h> 
#include <iostream> 
#include <algorithm>
#include <fstream>
#include <assert.h>
#include <iomanip>
#include <onnxruntime_cxx_api.h>
#include <cuda_provider_factory.h>
#include <onnxruntime_c_api.h>
#include <math.h>

using namespace std;

const int IMAGE_SHAPE = 512;
const int RPN_ANCHOR_STRIDE = 1;
const float THRESHOLD = 0.5;
std::vector<Ort::Value> ort_inputs;
std::array<float, 3> MEAN_PIXEL = { 123.7, 116.8, 103.9 };
std::array<int, 5> BACKBONE_STRIDES = { 4, 8, 16, 32, 64 };
std::array<float, 5> RPN_ANCHOR_SCALES = { 8 * 6, 16 * 6, 32 * 6, 64 * 6, 128 * 6 };
std::array<float, 3> RPN_ANCHOR_RATIOS = { 0.5, 1.0, 2.0 };
std::array<float, 14> input_meta_ = { 0, 512, 512, 3, 512, 512, 3, 0, 0, 512, 512, 1, 0, 1 };
std::array<float, 1 * 512 * 512 * 3> input_images_{};
std::array<float, 1 * 65472 * 4> input_anchors_{};
std::vector<int64_t> anchors_dim = { 1, 65472, 4 };
std::vector<int64_t> images_dim = { 1, 512, 512, 3 };
std::vector<int64_t> metas_dim = { 1, 14 };
Ort::Value anchors_input_tensor_{ nullptr };
Ort::Value images_input_tensor_{ nullptr };
Ort::Value metas_input_tensor_{ nullptr };


inline cv::Mat visualizeOneImageWithMask(const cv::Mat& img, const std::vector<std::array<float, 4>>& bboxes,
	const std::vector<uint64_t>& classIndices, const std::vector<cv::Mat>& masks,

	const float maskThreshold = 0.5)
{
	assert(bboxes.size() == classIndices.size());

	cv::Mat result = img.clone();

	for (size_t i = 0; i < bboxes.size(); ++i) {
		const auto& curBbox = bboxes[i];
		const uint64_t classIdx = classIndices[i];
		cv::Mat curMask = masks[i].clone();
		const cv::Scalar& curColor = (0, 0, 255);
		const std::string curLabel = "box";

		cv::rectangle(result, cv::Point(curBbox[0], curBbox[1]), cv::Point(curBbox[2], curBbox[3]), curColor, 2);

		int baseLine = 0;
		cv::Size labelSize = cv::getTextSize(curLabel, cv::FONT_HERSHEY_COMPLEX, 0.35, 1, &baseLine);
		cv::rectangle(result, cv::Point(curBbox[0], curBbox[1]),
			cv::Point(curBbox[0] + labelSize.width, curBbox[1] + static_cast<int>(1.3 * labelSize.height)),
			curColor, -1);
		cv::putText(result, curLabel, cv::Point(curBbox[0], curBbox[1] + labelSize.height), cv::FONT_HERSHEY_COMPLEX,
			0.35, cv::Scalar(255, 255, 255));

		// ---------------------------------------------------------------------//
		// Visualize masks

		const cv::Rect curBoxRect(cv::Point(curBbox[0], curBbox[1]), cv::Point(curBbox[2], curBbox[3]));

		cv::resize(curMask, curMask, curBoxRect.size());

		cv::Mat finalMask = (curMask > maskThreshold);

		cv::Mat coloredRoi = (0.3 * curColor + 0.7 * result(curBoxRect));

		coloredRoi.convertTo(coloredRoi, CV_8UC3);

		std::vector<cv::Mat> contours;
		cv::Mat hierarchy;
		finalMask.convertTo(finalMask, CV_8U);

		cv::findContours(finalMask, contours, hierarchy, cv::RETR_TREE, cv::CHAIN_APPROX_SIMPLE);
		cv::drawContours(coloredRoi, contours, -1, curColor, 5, cv::LINE_8, hierarchy, 100);
		coloredRoi.copyTo(result(curBoxRect), finalMask);
	}

	return result;
}

// pretty prints a shape dimension vector
std::string print_shape(const std::vector<int64_t>& v) {
	std::stringstream ss("");
	for (size_t i = 0; i < v.size() - 1; i++)
		ss << v[i] << "x";
	ss << v[v.size() - 1];
	return ss.str();
}
int** CreateINTgArray(int colcount, int rowcount) {
	int** strMsg = new int* [colcount];
	for (int i = 0; i < colcount; i++)
	{
		strMsg[i] = new int[rowcount];
	}
	return strMsg;
}
float** CreateFLOATgArray(int colcount, int rowcount) {
	float** strMsg = new float* [colcount];
	for (int i = 0; i < colcount; i++)
	{
		strMsg[i] = new float[rowcount];
	}
	return strMsg;
}
bool** CreateBOOLgArray(int colcount, int rowcount) {
	bool** strMsg = new bool* [colcount];
	for (int i = 0; i < colcount; i++)
	{
		strMsg[i] = new bool[rowcount];
	}
	return strMsg;
}
bool*** CreateBOOL3DMask(int dim_1, int dim_2, int dim_3) {
	bool*** Mask = new bool** [dim_1];
	for (int x = 0; x < dim_1; x++)
	{
		Mask[x] = new bool* [dim_2];
		for (int y = 0; y < dim_2; y++)
		{
			Mask[x][y] = new bool[dim_3];
		}
	}
	return Mask;
}
float*** CreateFLOAT3DMask(int dim_1, int dim_2, int dim_3) {
	float*** Mask;
	Mask = new float** [dim_1];
	for (int x = 0; x < dim_1; x++)
	{
		Mask[x] = new float* [dim_2];
		for (int y = 0; y < dim_2; y++)
		{
			Mask[x][y] = new float[dim_3];
		}
	}
	return Mask;
}
int*** CreateINT3DMask(int dim_1, int dim_2, int dim_3) {
	int*** Mask;
	Mask = new int** [dim_1];
	for (int x = 0; x < dim_1; x++)
	{
		Mask[x] = new int* [dim_2];
		for (int y = 0; y < dim_2; y++)
		{
			Mask[x][y] = new int[dim_3];
		}
	}
	return Mask;
}
float**** CreateFLOAT4DMask(int dim_1, int dim_2, int dim_3, int dim_4) {
	float**** Mask;
	Mask = new float*** [dim_1];
	for (int x = 0; x < dim_1; x++)
	{
		Mask[x] = new float** [dim_2];
		for (int y = 0; y < dim_2; y++)
		{
			Mask[x][y] = new float* [dim_3];
			for (int z = 0; z < dim_3; z++) {
				Mask[x][y][z] = new float[dim_4];
			}
		}
	}
	return Mask;
}
void DeleteINTArray(int size, int** pstr)
{
	for (int i = 0; i < size; i++)
	{
		delete[]  pstr[i];   // 要在指针前加[] , 否则的话 只释放p[i]所指的第一个单元所占的空间
	}
}
void DeleteFLOATArray(int size, float** pstr)
{
	for (int i = 0; i < size; i++)
	{
		delete[]  pstr[i];
	}
}
bool** unmold_mask(float** mask, int* bbox) {
	int y1 = bbox[0];
	int x1 = bbox[1];
	int y2 = bbox[2];
	int x2 = bbox[3];

	cv::Mat mask_(28, 28, CV_32FC1);
	for (int i = 0; i < 28; i++) {
		for (int j = 0; j < 28; j++) {
			mask_.at<float>(i, j) = mask[i][j];
		}
	}
	cv::Mat mask_re;
	cv::resize(mask_, mask_re, cv::Size(x2 - x1, y2 - y1));
	bool** mask_bool = CreateBOOLgArray(mask_re.size[0], mask_re.size[1]);
	for (int i = 0; i < mask_re.size[0]; i++) {
		for (int j = 0; j < mask_re.size[1]; j++) {
			if ((float)mask_re.at<float>(i, j) >= THRESHOLD) {
				mask_bool[i][j] = TRUE;
			}
			else { mask_bool[i][j] = FALSE; }
		}
	}

	bool** full_mask = CreateBOOLgArray(IMAGE_SHAPE, IMAGE_SHAPE);
	for (int i = 0; i < IMAGE_SHAPE; i++) {
		for (int j = 0; j < IMAGE_SHAPE; j++) {
			full_mask[i][j] = FALSE;
		}
	}
	for (int i = y1; i < y2; i++) {
		for (int j = x1; j < x2; j++) {
			full_mask[i][j] = mask_bool[i - y1][j - x1];
		}
	}
	return full_mask;
}
bool*** unmold_detections(float detection[100][6], float**** mrcnn_mask) {
	int N = 0;
	for (int i = 0; i < 100; i++) {
		if (detection[i][4] == 0) { N = i + 1; break; }
	}
	float** boxes = CreateFLOATgArray(N, 4);
	for (int i = 0; i < N; i++) {
		for (int j = 0; j < 4; j++) {
			boxes[i][j] = detection[i][j];
		}
	}
	int** class_ids = CreateINTgArray(N, 1);
	for (int i = 0; i < N; i++) {
		class_ids[i][0] = floor(detection[i][4]);
	}
	float** scores = CreateFLOATgArray(N, 1);
	for (int i = 0; i < N; i++) {
		scores[i][0] = detection[i][5];
	}
	float*** masks = CreateFLOAT3DMask(N, 28, 28);
	for (int i = 0; i < N; i++) {
		int id = class_ids[i][0];
		for (int j = 0; j < 28; j++) {
			for (int k = 0; k < 28; k++) {
				masks[i][j][k] = mrcnn_mask[i][j][k][id];
			}
		}
	}
	std::array<int, 4> scale = { 511,511,511,511 };
	std::array<int, 4> shift = { 0,0,1,1 };
	int** boxes_ = CreateINTgArray(N, 4);
	for (int i = 0; i < N; i++) {
		for (int j = 0; j < 4; j++) {
			boxes_[i][j] = round(boxes[i][j] * scale[j] + shift[j]);
		}
	}
	bool*** full_masks = CreateBOOL3DMask(N, IMAGE_SHAPE, IMAGE_SHAPE);
	for (int i = 0; i < N; ++i) {
		bool** mask_per = unmold_mask(masks[i], boxes_[i]);
		for (int j = 0; j < IMAGE_SHAPE; j++) {
			for (int k = 0; k < IMAGE_SHAPE; k++) {
				full_masks[i][j][k] = mask_per[j][k];
			}
		}
		delete[] mask_per;
		mask_per = NULL;
	}
	return full_masks;
}
int GetObjectNumber(float detection[100][6]) {
	int N = 0;
	for (int i = 0; i < 100; i++) {
		if (detection[i][4] == 0) { N = i + 1; break; }
	}
	cout << N << " objects detected." << endl;
	return N;
}
int main() {

	Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "test");
	Ort::SessionOptions session_options;
	session_options.SetIntraOpNumThreads(1);
	std::vector<std::string> availableProviders = Ort::GetAvailableProviders();
	auto cudaAvailable = std::find(availableProviders.begin(), availableProviders.end(), "CUDAExecutionProvider");
	OrtCUDAProviderOptions cudaOption{0};
	    
	session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED); /** 设置图像优化级别 **/

	session_options.AppendExecutionProvider_CUDA(cudaOption);

	const wchar_t* model_path = L"demo3.onnx";
	printf("Using Onnxruntime C++ API\n");
	Ort::Session session(env, model_path, session_options);
	auto allocator_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);

	///1.Prepare for the "input_image"//

	cv::Mat img = cv::imread("demo3.jpg", cv::IMREAD_COLOR);

	

	if (img.rows != IMAGE_SHAPE && img.cols != IMAGE_SHAPE)
	{
		cv::resize(img, img, cv::Size(IMAGE_SHAPE, IMAGE_SHAPE));
	}
	768->1024
	// cv::Mat img_pad;
	//cv::copyMakeBorder(img, img_pad, 128, 128, 0, 0, cv::BorderTypes::BORDER_CONSTANT, cv::Scalar(0, 0, 0));

	cv::Mat img_;
	img.convertTo(img_, CV_32FC3);
	for (int i = 0; i < IMAGE_SHAPE; i++) {
		for (int j = 0; j < IMAGE_SHAPE; j++) {
			img_.at<cv::Vec3f>(i, j)[0] -= MEAN_PIXEL[0];
			img_.at<cv::Vec3f>(i, j)[1] -= MEAN_PIXEL[1];
			img_.at<cv::Vec3f>(i, j)[2] -= MEAN_PIXEL[2];
		}
	}
	float* image_output = input_images_.data();
	fill(input_images_.begin(), input_images_.end(), 0.f);
	for (int c = 0; c < IMAGE_SHAPE; c++) {
		for (int i = 0; i < IMAGE_SHAPE; i++) {
			for (int j = 0; j < 3; j++) {
				image_output[c * IMAGE_SHAPE * 3 + i * 3 + j] = (img_.ptr<float>(c)[i * 3 + j]);//Transfer cv::Mat to std::array.
			}
		}
	}
	images_input_tensor_ = Ort::Value::CreateTensor<float>(allocator_info, input_images_.data(), input_images_.size(), images_dim.data(), images_dim.size());
	//float** my = generate_anchors(48.0, 128, 4);

	/2.Prepare for the "input_anchors"/

	std::ifstream data("anchors.txt");
	float* anchors_output = input_anchors_.data();
	fill(input_anchors_.begin(), input_anchors_.end(), 0.f);
	for (int c = 0; c < 1; c++) {
		for (int i = 0; i < 65472; i++) {
			for (int j = 0; j < 4; j++) {
				data >> setprecision(20) >> anchors_output[c * 65472 * 4 + i * 4 + j]; //We load the matrix from TXT for the calculation process of anchors
																				 //is too complex to be tranferred from Python.
			}
		}
	}
	anchors_input_tensor_ = Ort::Value::CreateTensor<float>(allocator_info, input_anchors_.data(), input_anchors_.size(), anchors_dim.data(), anchors_dim.size());

	/3.Prepare for the "input_image_meta"/

	metas_input_tensor_ = Ort::Value::CreateTensor<float>(allocator_info, input_meta_.data(), input_meta_.size(), metas_dim.data(), metas_dim.size());

	/4.Prepare for the model Inputs & Outputs indications/

	ort_inputs.push_back(std::move(images_input_tensor_));
	ort_inputs.push_back(std::move(metas_input_tensor_));
	ort_inputs.push_back(std::move(anchors_input_tensor_));

	std::vector<const char*> input_names = { "input_image", "input_image_meta", "input_anchors" };
	const char* const output_names[] = { "mrcnn_detection", "mrcnn_class", "mrcnn_bbox" , "mrcnn_mask", "ROI", "rpn_class", "rpn_bbox" };
	//for (size_t i = 0; i < 10; i++)
	//{
	double timeStart = (double)cv::getTickCount();
	std::vector<Ort::Value> ort_outputs = session.Run(Ort::RunOptions{ nullptr }, input_names.data(),
		ort_inputs.data(), ort_inputs.size(),
		output_names, 7);
	double nTime = ((double)cv::getTickCount() - timeStart) / cv::getTickFrequency();
	cout << "running time :" << nTime << "sec\n" << endl;
	//}
	

	/5.Get Outputs and check them/
	auto type_info = ort_outputs[0].GetTensorTypeAndShapeInfo();
	std::vector<int64_t> shape = type_info.GetShape();
	float detection[100][6];
	float* detection_output_data = ort_outputs[0].GetTensorMutableData<float>();
	float* mask_output_data = ort_outputs[3].GetTensorMutableData<float>();

	for (int i = 0; i < 100; i++) {
		for (int j = 0; j < 6; j++) {
			detection[i][j] = detection_output_data[i * 6 + j];
		}
	}
	float**** mrcnn_mask = CreateFLOAT4DMask(100, 28, 28, 2);
	for (int i = 0; i < 100; i++) {
		for (int j = 0; j < 28; j++) {
			for (int k = 0; k < 28; k++) {
				for (int l = 0; l < 2; l++) {
					mrcnn_mask[i][j][k][l] = mask_output_data[i * 28 * 28 * 2 + j * 28 * 2 + k * 2 + l];
				}
			}
		}
	}
	int N = GetObjectNumber(detection);
	bool*** masks = unmold_detections(detection, mrcnn_mask);
	int*** masks_ = CreateINT3DMask(N, IMAGE_SHAPE, 512);
	for (int i = 0; i < N; i++) {
		for (int j = 0; j < 512; j++) {
			for (int k = 0; k < 512; k++) {
				masks_[i][j][k] = masks[i][j][k];
			}
		}
	}
	cv::Mat mask(512, 512, CV_32FC1);
	for (int i = 0; i < N; ++i) {

		for (int l = 0; l < 512; l++) {
			for (int c = 0; c < 512; c++) {
				if (masks[i][l][c] == TRUE) {
					cout << "oh" << endl;
					mask.at<float>(l, c) = 255.0;
				}

			}
		}
		cv::imwrite("my_mask.jpg", mask);
		//cv::Mat final_mask;
		//mask.convertTo(final_mask, CV_8UC1);
		/*vector<vector<cv::Point>> contours;
		vector<cv::Vec4i> hierarchy;
		cv::findContours(final_mask, contours, hierarchy, cv::RETR_TREE, cv::CHAIN_APPROX_SIMPLE);
		drawContours(img, contours, -1,cv::Scalar(255,255,255));*/
	}
	/*cv::namedWindow("dst", 0);
	cv::imshow("dst", img);
	cv::waitKey();*/
	return 0;
}
  • 3
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 5
    评论
评论 5
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值