二阶段mask rcnn进行tensorrt int8量化

最新推荐文章于 2024-07-20 15:44:26 发布

csuzhaoqinghui

最新推荐文章于 2024-07-20 15:44:26 发布

阅读量1.7k

点赞数 1

分类专栏：深度学习

本文链接：https://blog.csdn.net/csuzhaoqinghui/article/details/108517496

版权

深度学习专栏收录该内容

79 篇文章 3 订阅

订阅专栏

1、下载安装tensorflow==1.13.1

2、下载uff == 0.6.5

3、下载tensorrt==7.0.0.11

4、git clone https://github.com/matterport/Mask_RCNN.git 进行maskrcnn训练

5、将训练好的mask rcnn模型转换成tensorrt模型，可以下载demo模型链接: https://pan.baidu.com/s/1_hPmoE9dxV8gXZPLCACQWQ 提取码: pggr

转换时第一个文件注意加上-l + 文件2config

python3 mrcnn_to_trt_single.py -w 33/mask_rcnn_coco.h5 -o mrcnn_nchw.uff -p ./config1.py

from keras.models import model_from_json, Model
from keras import backend as K
from keras.layers import Input, Lambda
from tensorflow.python.framework import graph_util
from tensorflow.python.framework import graph_io
from mrcnn.model import *
import mrcnn.model as modellib
from mrcnn.config import Config
import sys
import os
ROOT_DIR = os.path.abspath("./")
LOG_DIR = os.path.join(ROOT_DIR, "logs")
import argparse
import os
import uff


def parse_command_line_arguments(args=None):
    parser = argparse.ArgumentParser(prog='keras_to_trt', description='Convert trained keras .hdf5 model to trt .uff')

    parser.add_argument(
        '-w',
        '--weights',
        type=str,
        default=None,
        required=True,
        help="The checkpoint weights file of keras model."
    )

    parser.add_argument(
        '-o',
        '--output_file',
        type=str,
        default=None,
        required=True,
        help="The path to output .uff file."
    )

    parser.add_argument(
        '-l',
        '--list-nodes',
        action='store_true',
        help="show list of nodes contained in converted pb"
    )

    parser.add_argument(
        '-p',
        '--preprocessor',
        type=str,
        default=False,
        help="The preprocess function for converting tf node to trt plugin"
    )

    return parser.parse_args(args)


class CocoConfig(Config):
    """Configuration for training on MS COCO.
    Derives from the base Config class and overrides values specific
    to the COCO dataset.
    """
    # Give the configuration a recognizable name
    NAME = "coco"

    # We use a GPU with 12GB memory, which can fit two images.
    # Adjust down if you use a smaller GPU.
    IMAGES_PER_GPU = 2

    # Uncomment to train on 8 GPUs (default is 1)
    # GPU_COUNT = 8

    # Number of classes (including background)
    NUM_CLASSES = 1 + 80  # COCO has 80 classes

class InferenceConfig(CocoConfig):
    # Set batch size to 1 since we'll be running inference on
    # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1

def main(args=None):

    K.set_image_data_format('channels_first')
    K.set_learning_phase(0)

    args = parse_command_line_arguments(args)

    model_weights_path = args.weights
    output_file_path = args.output_file
    list_nodes = args.list_nodes

    config = InferenceConfig()
    config.display()

    model = modellib.MaskRCNN(mode="inference", model_dir=LOG_DIR, config=config).keras_model

    model.load_weights(model_weights_path, by_name=True)


    model_A = Model(inputs=model.input, outputs=model.get_layer('mrcnn_mask').output)
    model_A.summary()

    output_nodes = ['mrcnn_detection', "mrcnn_mask/Sigmoid"]
    convert_model(model_A, output_file_path, output_nodes, preprocessor=args.preprocessor,
                  text=True, list_nodes=list_nodes)


def convert_model(inference_model, output_path, output_nodes=[], preprocessor=None, text=False,
                  list_nodes=False):
    # convert the keras model to pb
    orig_output_node_names = [node.op.name for node in inference_model.outputs]
    print("The output names of tensorflow graph nodes: {}".format(str(orig_output_node_names)))

    sess = K.get_session()

    constant_graph = graph_util.convert_variables_to_constants(
        sess,
        sess.graph.as_graph_def(),
        orig_output_node_names)

    temp_pb_path = "../temp11.pb"
    graph_io.write_graph(constant_graph, os.path.dirname(temp_pb_path), os.path.basename(temp_pb_path),
                         as_text=False)

    predefined_output_nodes = output_nodes
    if predefined_output_nodes != []:
        trt_output_nodes = predefined_output_nodes
    else:
        trt_output_nodes = orig_output_node_names

    # convert .pb to .uff
    uff.from_tensorflow_frozen_model(
        temp_pb_path,
        output_nodes=trt_output_nodes,
        preprocessor=preprocessor,
        text=text,
        list_nodes=list_nodes,
        output_filename=output_path,
        debug_mode = False
    )

    os.remove(temp_pb_path)


if __name__ == "__main__":
    main()

#
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
#config文件
import graphsurgeon as gs
import tensorflow as tf

fpn_p5upsampled = gs.create_plugin_node("fpn_p5upsampled", op="ResizeNearest_TRT", dtype=tf.float32, scale=2.0)
fpn_p4upsampled = gs.create_plugin_node("fpn_p4upsampled", op="ResizeNearest_TRT", dtype=tf.float32, scale=2.0)
fpn_p3upsampled = gs.create_plugin_node("fpn_p3upsampled", op="ResizeNearest_TRT", dtype=tf.float32, scale=2.0)

roi = gs.create_plugin_node("ROI", op="ProposalLayer_TRT", prenms_topk=1024, keep_topk=1000, iou_threshold=0.7)
roi_align_classifier = gs.create_plugin_node("roi_align_classifier", op="PyramidROIAlign_TRT", pooled_size=7)
mrcnn_detection = gs.create_plugin_node("mrcnn_detection", op="DetectionLayer_TRT", num_classes=81, keep_topk=100, score_threshold=0.7, iou_threshold=0.3)
roi_align_mask = gs.create_plugin_node("roi_align_mask_trt", op="PyramidROIAlign_TRT", pooled_size=14)
mrcnn_detection_bboxes = gs.create_plugin_node("mrcnn_detection_bboxes", op="SpecialSlice_TRT")

namespace_plugin_map = {
"fpn_p5upsampled":fpn_p5upsampled,

"fpn_p4upsampled":fpn_p4upsampled,

"fpn_p3upsampled":fpn_p3upsampled,

"roi_align_classifier":roi_align_classifier,

"mrcnn_detection":mrcnn_detection,

"ROI":roi,

"roi_align_mask":roi_align_mask,

"lambda_1": mrcnn_detection_bboxes,

}

timedistributed_remove_list = [
        "mrcnn_class_conv1/Reshape/shape", "mrcnn_class_conv1/Reshape", "mrcnn_class_conv1/Reshape_1/shape", "mrcnn_class_conv1/Reshape_1",
        "mrcnn_class_bn1/Reshape/shape", "mrcnn_class_bn1/Reshape", "mrcnn_class_bn1/Reshape_5/shape", "mrcnn_class_bn1/Reshape_5",
        "mrcnn_class_conv2/Reshape/shape", "mrcnn_class_conv2/Reshape", "mrcnn_class_conv2/Reshape_1/shape", "mrcnn_class_conv2/Reshape_1",
        "mrcnn_class_bn2/Reshape/shape", "mrcnn_class_bn2/Reshape", "mrcnn_class_bn2/Reshape_5/shape", "mrcnn_class_bn2/Reshape_5",
        "mrcnn_class_logits/Reshape/shape", "mrcnn_class_logits/Reshape","mrcnn_class_logits/Reshape_1/shape", "mrcnn_class_logits/Reshape_1",
        "mrcnn_class/Reshape/shape", "mrcnn_class/Reshape","mrcnn_class/Reshape_1/shape", "mrcnn_class/Reshape_1",
        "mrcnn_bbox_fc/Reshape/shape", "mrcnn_bbox_fc/Reshape","mrcnn_bbox_fc/Reshape_1/shape", "mrcnn_bbox_fc/Reshape_1",

        "mrcnn_mask_conv1/Reshape/shape", "mrcnn_mask_conv1/Reshape", "mrcnn_mask_conv1/Reshape_1/shape", "mrcnn_mask_conv1/Reshape_1",
        "mrcnn_mask_bn1/Reshape/shape", "mrcnn_mask_bn1/Reshape", "mrcnn_mask_bn1/Reshape_5/shape", "mrcnn_mask_bn1/Reshape_5",
        "mrcnn_mask_conv2/Reshape/shape", "mrcnn_mask_conv2/Reshape", "mrcnn_mask_conv2/Reshape_1/shape", "mrcnn_mask_conv2/Reshape_1",
        "mrcnn_mask_bn2/Reshape/shape", "mrcnn_mask_bn2/Reshape", "mrcnn_mask_bn2/Reshape_5/shape", "mrcnn_mask_bn2/Reshape_5",
        "mrcnn_mask_conv3/Reshape/shape", "mrcnn_mask_conv3/Reshape", "mrcnn_mask_conv3/Reshape_1/shape", "mrcnn_mask_conv3/Reshape_1",
        "mrcnn_mask_bn3/Reshape/shape", "mrcnn_mask_bn3/Reshape", "mrcnn_mask_bn3/Reshape_5/shape", "mrcnn_mask_bn3/Reshape_5",
        "mrcnn_mask_conv4/Reshape/shape", "mrcnn_mask_conv4/Reshape", "mrcnn_mask_conv4/Reshape_1/shape", "mrcnn_mask_conv4/Reshape_1",
        "mrcnn_mask_bn4/Reshape/shape", "mrcnn_mask_bn4/Reshape", "mrcnn_mask_bn4/Reshape_5/shape", "mrcnn_mask_bn4/Reshape_5",
        "mrcnn_mask_deconv/Reshape/shape", "mrcnn_mask_deconv/Reshape", "mrcnn_mask_deconv/Reshape_1/shape", "mrcnn_mask_deconv/Reshape_1",
        "mrcnn_mask/Reshape/shape", "mrcnn_mask/Reshape", "mrcnn_mask/Reshape_1/shape", "mrcnn_mask/Reshape_1",
        ]

timedistributed_connect_pairs = [
        ("mrcnn_mask_deconv/Relu", "mrcnn_mask/convolution"), # mrcnn_mask_deconv -> mrcnn_mask
        ("activation_74/Relu", "mrcnn_mask_deconv/conv2d_transpose"), #active74 -> mrcnn_mask_deconv
        ("mrcnn_mask_bn4/batchnorm/add_1","activation_74/Relu"),  # mrcnn_mask_bn4 -> active74
        ("mrcnn_mask_conv4/BiasAdd", "mrcnn_mask_bn4/batchnorm/mul_1"), #mrcnn_mask_conv4 -> mrcnn_mask_bn4
        ("activation_73/Relu", "mrcnn_mask_conv4/convolution"), #active73 -> mrcnn_mask_conv4
        ("mrcnn_mask_bn3/batchnorm/add_1","activation_73/Relu"), #mrcnn_mask_bn3 -> active73
        ("mrcnn_mask_conv3/BiasAdd", "mrcnn_mask_bn3/batchnorm/mul_1"), #mrcnn_mask_conv3 -> mrcnn_mask_bn3
        ("activation_72/Relu", "mrcnn_mask_conv3/convolution"), #active72 -> mrcnn_mask_conv3
        ("mrcnn_mask_bn2/batchnorm/add_1","activation_72/Relu"), #mrcnn_mask_bn2 -> active72
        ("mrcnn_mask_conv2/BiasAdd", "mrcnn_mask_bn2/batchnorm/mul_1"), #mrcnn_mask_conv2 -> mrcnn_mask_bn2
        ("activation_71/Relu", "mrcnn_mask_conv2/convolution"), #active71 -> mrcnn_mask_conv2
        ("mrcnn_mask_bn1/batchnorm/add_1","activation_71/Relu"), #mrcnn_mask_bn1 -> active71
        ("mrcnn_mask_conv1/BiasAdd", "mrcnn_mask_bn1/batchnorm/mul_1"), #mrcnn_mask_conv1 -> mrcnn_mask_bn1
        ("roi_align_mask_trt", "mrcnn_mask_conv1/convolution"), #roi_align_mask -> mrcnn_mask_conv1


        ("mrcnn_class_bn2/batchnorm/add_1","activation_69/Relu"), # mrcnn_class_bn2 -> active 69
        ("mrcnn_class_conv2/BiasAdd", "mrcnn_class_bn2/batchnorm/mul_1"), # mrcnn_class_conv2 -> mrcnn_class_bn2
        ("activation_68/Relu", "mrcnn_class_conv2/convolution"), # active 68 -> mrcnn_class_conv2
        ("mrcnn_class_bn1/batchnorm/add_1","activation_68/Relu"), # mrcnn_class_bn1 -> active 68
        ("mrcnn_class_conv1/BiasAdd", "mrcnn_class_bn1/batchnorm/mul_1"), # mrcnn_class_conv1 -> mrcnn_class_bn1
        ("roi_align_classifier", "mrcnn_class_conv1/convolution"), # roi_align_classifier -> mrcnn_class_conv1
        ]

dense_compatible_patch =["pool_squeeze/Squeeze", "pool_squeeze/Squeeze_1", #No need to squeeze the dimensions for TRT Dense Layer
        "mrcnn_bbox/Shape", "mrcnn_bbox/strided_slice/stack", # mrcnn_bbox(Reshape): No need to reshape, cause we can process it as 1-D array in detectionlayer's kernel
        "mrcnn_bbox/strided_slice/stack_1", "mrcnn_bbox/strided_slice/stack_2",
        "mrcnn_bbox/strided_slice", "mrcnn_bbox/Reshape/shape/1",
        "mrcnn_bbox/Reshape/shape/2", "mrcnn_bbox/Reshape/shape/3",
        "mrcnn_bbox/Reshape/shape", "mrcnn_bbox/Reshape"]

dense_compatible_connect_pairs = [
        ("activation_69/Relu","mrcnn_bbox_fc/MatMul"), #activation_69 -> mrcnn_bbox_fc
        ("activation_69/Relu", "mrcnn_class_logits/MatMul"), #activation_69 -> mrcnn_class_logits
        ("mrcnn_class_logits/BiasAdd", "mrcnn_class/Softmax"), #mrcnn_class_logits -> mrcnn_class
        ("mrcnn_class/Softmax", "mrcnn_detection"), #mrcnn_class -> mrcnn_detection
        ("mrcnn_bbox_fc/BiasAdd", "mrcnn_detection"), #mrcnn_bbox_fc -> mrcnn_detection
        ]

def connect(dynamic_graph, connections_list):

    for node_a_name, node_b_name in connections_list:
        if node_a_name not in dynamic_graph.node_map[node_b_name].input:
            dynamic_graph.node_map[node_b_name].input.insert(0, node_a_name)

def preprocess(dynamic_graph):
    # Now create a new graph by collapsing namespaces
    dynamic_graph.collapse_namespaces(namespace_plugin_map, unique_inputs=True)
    dynamic_graph.remove(timedistributed_remove_list)
    dynamic_graph.remove(dense_compatible_patch)
    dynamic_graph.remove(['input_anchors', 'input_image_meta'])

    connect(dynamic_graph, timedistributed_connect_pairs)
    connect(dynamic_graph, dense_compatible_connect_pairs)

6、将生成的tensorrt模型进行上线部署

logger.cpp

/*
 * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include "logger.h"
#include "logging.h"

Logger gLogger{Logger::Severity::kINFO};
LogStreamConsumer gLogVerbose{LOG_VERBOSE(gLogger)};
LogStreamConsumer gLogInfo{LOG_INFO(gLogger)};
LogStreamConsumer gLogWarning{LOG_WARN(gLogger)};
LogStreamConsumer gLogError{LOG_ERROR(gLogger)};
LogStreamConsumer gLogFatal{LOG_FATAL(gLogger)};

void setReportableSeverity(Logger::Severity severity)
{
    gLogger.setReportableSeverity(severity);
    gLogVerbose.setReportableSeverity(severity);
    gLogInfo.setReportableSeverity(severity);
    gLogWarning.setReportableSeverity(severity);
    gLogError.setReportableSeverity(severity);
    gLogFatal.setReportableSeverity(severity);
}

logger.h

/*
 * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef LOGGER_H
#define LOGGER_H

#include "logging.h"

extern Logger gLogger;
extern LogStreamConsumer gLogVerbose;
extern LogStreamConsumer gLogInfo;
extern LogStreamConsumer gLogWarning;
extern LogStreamConsumer gLogError;
extern LogStreamConsumer gLogFatal;

void setReportableSeverity(Logger::Severity severity);

#endif // LOGGER_H

mrcnn_config.h

/*
 * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef MASKRCNN_CONFIG_HEADER
#define MASKRCNN_CONFIG_HEADER
#include "NvInfer.h"
#include <string>
#include <vector>
using namespace nvinfer1;

namespace MaskRCNNConfig
{
static const nvinfer1::Dims3 IMAGE_SHAPE{3, 512, 512};

// Pooled ROIs
static const int POOL_SIZE = 7;
static const int MASK_POOL_SIZE = 14;

// Threshold to determine the mask area out of final convolution output
static const float MASK_THRESHOLD = 0.5;

// Bounding box refinement standard deviation for RPN and final detections.
static const float RPN_BBOX_STD_DEV[] = {0.1, 0.1, 0.2, 0.2};
static const float BBOX_STD_DEV[] = {0.1, 0.1, 0.2, 0.2};

// Max number of final detections
static const int DETECTION_MAX_INSTANCES = 100;

// Minimum probability value to accept a detected instance
// ROIs below this threshold are skipped
static const float DETECTION_MIN_CONFIDENCE = 0.7;

// Non-maximum suppression threshold for detection
static const float DETECTION_NMS_THRESHOLD = 0.3;

// The strides of each layer of the FPN Pyramid. These values
// are based on a Resnet101 backbone.
//static const std::vector<float> BACKBONE_STRIDES = {4, 8, 16, 32, 64};
static const std::vector<float> BACKBONE_STRIDES = {4, 8, 16, 32};
// Size of the fully-connected layers in the classification graph
static const int FPN_CLASSIF_FC_LAYERS_SIZE = 1024;

// Size of the top-down layers used to build the feature pyramid
static const int TOP_DOWN_PYRAMID_SIZE = 256;

// Number of classification classes (including background)
static const int NUM_CLASSES = 1 + 80; // COCO has 80 classes

// Length of square anchor side in pixels
//static const std::vector<float> RPN_ANCHOR_SCALES = {32, 64, 128, 256, 512};
static const std::vector<float> RPN_ANCHOR_SCALES = {32, 64, 128, 256};
// Ratios of anchors at each cell (width/height)
// A value of 1 represents a square anchor, and 0.5 is a wide anchor
static const float RPN_ANCHOR_RATIOS[] = {0.5, 1, 2};

// Anchor stride
// If 1 then anchors are created for each cell in the backbone feature map.
// If 2, then anchors are created for every other cell, and so on.
static const int RPN_ANCHOR_STRIDE = 1;

// Although Python impementation uses 6000,
//  TRT fails if this number larger than MAX_TOPK_K defined in engine/checkMacros.h
static const int MAX_PRE_NMS_RESULTS = 1024; // 3840;

// Non-max suppression threshold to filter RPN proposals.
// You can increase this during training to generate more propsals.
static const float RPN_NMS_THRESHOLD = 0.7;

// ROIs kept after non-maximum suppression (training and inference)
static const int POST_NMS_ROIS_INFERENCE = 300;

// COCO Class names
static const std::vector<std::string> CLASS_NAMES = {
    "BG",
    "person",
    "bicycle",
    "car",
    "motorcycle",
    "airplane",
    "bus",
    "train",
    "truck",
    "boat",
    "traffic light",
    "fire hydrant",
    "stop sign",
    "parking meter",
    "bench",
    "bird",
    "cat",
    "dog",
    "horse",
    "sheep",
    "cow",
    "elephant",
    "bear",
    "zebra",
    "giraffe",
    "backpack",
    "umbrella",
    "handbag",
    "tie",
    "suitcase",
    "frisbee",
    "skis",
    "snowboard",
    "sports ball",
    "kite",
    "baseball bat",
    "baseball glove",
    "skateboard",
    "surfboard",
    "tennis racket",
    "bottle",
    "wine glass",
    "cup",
    "fork",
    "knife",
    "spoon",
    "bowl",
    "banana",
    "apple",
    "sandwich",
    "orange",
    "broccoli",
    "carrot",
    "hot dog",
    "pizza",
    "donut",
    "cake",
    "chair",
    "couch",
    "potted plant",
    "bed",
    "dining table",
    "toilet",
    "tv",
    "laptop",
    "mouse",
    "remote",
    "keyboard",
    "cell phone",
    "microwave",
    "oven",
    "toaster",
    "sink",
    "refrigerator",
    "book",
    "clock",
    "vase",
    "scissors",
    "teddy bear",
    "hair drier",
    "toothbrush",
};

static const std::string MODEL_NAME = "mrcnn_nchw.uff";
static const std::string MODEL_INPUT = "input_image";
static const Dims3 MODEL_INPUT_SHAPE = IMAGE_SHAPE;
static const std::vector<std::string> MODEL_OUTPUTS = {"mrcnn_detection", "mrcnn_mask/Sigmoid"};
static const Dims2 MODEL_DETECTION_SHAPE{DETECTION_MAX_INSTANCES, 6};
static const Dims4 MODEL_MASK_SHAPE{DETECTION_MAX_INSTANCES, NUM_CLASSES, 28, 28};
} // namespace MaskRCNNConfig
#endif

sampleUffMaskRCNN.cpp

/*
 * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef _MSC_VER
#include <unistd.h>
#include <sys/time.h>
#endif

#include <assert.h>
#include <chrono>
#include <ctime>
#include <cuda_runtime_api.h>
#include <fstream>
#include <iostream>
#include <sstream>
#include <stdlib.h>
#include <sys/stat.h>
#include <time.h>
#include <vector>

#include "NvInfer.h"
#include "NvUffParser.h"

#include "argsParser.h"
#include "buffers.h"
#include "common.h"
#include "logger.h"

// max
#include <algorithm>

// MaskRCNN Parameter
#include "mrcnn_config.h"

const std::string gSampleName = "TensorRT.sample_maskrcnn";

namespace MaskRCNNUtils
{
struct RawDetection
{
    float y1, x1, y2, x2, class_id, score;
};

struct Mask
{
    float raw[MaskRCNNConfig::MASK_POOL_SIZE * 2 * MaskRCNNConfig::MASK_POOL_SIZE * 2];
};

struct BBoxInfo
{
    samplesCommon::BBox box;
    int label = -1;
    float prob = 0.0f;

    Mask* mask = nullptr;
};

template <typename T>
struct PPM
{
    std::string magic, fileName;
    int h, w, max;
    std::vector<T> buffer;
};

void readPPMFile(const std::string& filename, PPM<uint8_t>& ppm)
{
    ppm.fileName = filename;
    std::ifstream infile(filename, std::ifstream::binary);
    assert(infile.is_open() && "Attempting to read from a file that is not open. ");
    infile >> ppm.magic >> ppm.w >> ppm.h >> ppm.max;
    infile.seekg(1, infile.cur);

    ppm.buffer.resize(ppm.w * ppm.h * 3, 0);

    infile.read(reinterpret_cast<char*>(ppm.buffer.data()), ppm.w * ppm.h * 3);
}

void writePPMFile(const std::string& filename, PPM<uint8_t>& ppm)
{
    std::ofstream outfile("./" + filename, std::ofstream::binary);
    assert(!outfile.fail());
    outfile << "P6"
            << "\n"
            << ppm.w << " " << ppm.h << "\n"
            << ppm.max << "\n";

    outfile.write(reinterpret_cast<char*>(ppm.buffer.data()), ppm.w * ppm.h * 3);
}

template <typename T>
void resizePPM(const PPM<T>& src, PPM<T>& dst, int target_height, int target_width, int channel)
{
    auto clip = [](float in, float low, float high) -> float { return (in < low) ? low : (in > high ? high : in); };
    int original_height = src.h;
    int original_width = src.w;
    assert(dst.h == target_height);
    assert(dst.w == target_width);
    float ratio_h = static_cast<float>(original_height - 1.0f) / static_cast<float>(target_height - 1.0f);
    float ratio_w = static_cast<float>(original_width - 1.0f) / static_cast<float>(target_width - 1.0f);

    int dst_idx = 0;
    for (int y = 0; y < target_height; ++y)
    {
        for (int x = 0; x < target_width; ++x)
        {
            float x0 = static_cast<float>(x) * ratio_w;
            float y0 = static_cast<float>(y) * ratio_h;
            int left = static_cast<int>(clip(std::floor(x0), 0.0f, static_cast<float>(original_width - 1.0f)));
            int top = static_cast<int>(clip(std::floor(y0), 0.0f, static_cast<float>(original_height - 1.0f)));
            int right = static_cast<int>(clip(std::ceil(x0), 0.0f, static_cast<float>(original_width - 1.0f)));
            int bottom = static_cast<int>(clip(std::ceil(y0), 0.0f, static_cast<float>(original_height - 1.0f)));

            for (int c = 0; c < channel; ++c)
            {
                // H, W, C ordering
                T left_top_val = src.buffer[top * (original_width * channel) + left * (channel) + c];
                T right_top_val = src.buffer[top * (original_width * channel) + right * (channel) + c];
                T left_bottom_val = src.buffer[bottom * (original_width * channel) + left * (channel) + c];
                T right_bottom_val = src.buffer[bottom * (original_width * channel) + right * (channel) + c];
                float top_lerp = left_top_val + (right_top_val - left_top_val) * (x0 - left);
                float bottom_lerp = left_bottom_val + (right_bottom_val - left_bottom_val) * (x0 - left);
                float lerp = clip(std::round(top_lerp + (bottom_lerp - top_lerp) * (y0 - top)), 0.0f, 255.0f);
                dst.buffer[dst_idx] = (static_cast<T>(lerp));
                dst_idx++;
            }
        }
    }
}

void padPPM(const PPM<uint8_t>& src, PPM<uint8_t>& dst, int top, int bottom, int left, int right)
{
    assert(dst.h == (src.h + top + bottom));
    assert(dst.w == (src.w + left + right));

    for (int y = 0; y < src.h; y++)
    {
        for (int x = 0; x < src.w; x++)
        {
            for (int c = 0; c < 3; c++)
            {
                dst.buffer[(top + y) * dst.w * 3 + (left + x) * 3 + c] = src.buffer[y * src.w * 3 + x * 3 + c];
            }
        }
    }
}

void preprocessPPM(PPM<uint8_t>& src, PPM<uint8_t>& dst, int target_h, int target_w)
{
    assert(target_h == target_w);
    int input_dim = target_h;
    // padding the input img to model's input_size:
    const int image_dim = std::max(src.h, src.w);
    int resize_h = src.h * input_dim / image_dim;
    int resize_w = src.w * input_dim / image_dim;
    assert(resize_h == input_dim || resize_w == input_dim);

    int y_offset = (input_dim - resize_h) / 2;
    int x_offset = (input_dim - resize_w) / 2;

    // resize
    PPM<uint8_t> resized_ppm;
    resized_ppm.h = resize_h;
    resized_ppm.w = resize_w;
    resized_ppm.max = src.max;
    resized_ppm.buffer.resize(resize_h * resize_w * 3, 0);
    resizePPM<uint8_t>(src, resized_ppm, resize_h, resize_w, 3);

    // pad
    dst.h = target_h;
    dst.w = target_w;
    dst.max = src.max;
    dst.buffer.resize(dst.h * dst.w * 3, 0);
    padPPM(resized_ppm, dst, y_offset, input_dim - resize_h - y_offset, x_offset, input_dim - resize_w - x_offset);
}

PPM<uint8_t> resizeMask(const BBoxInfo& box, const float mask_threshold)
{
    PPM<uint8_t> result;
    if (!box.mask)
    {
        assert(result.buffer.size() == 0);
        return result;
    }

    const int h = box.box.y2 - box.box.y1;
    const int w = box.box.x2 - box.box.x1;

    PPM<float> raw_mask;
    raw_mask.h = MaskRCNNConfig::MASK_POOL_SIZE * 2;
    raw_mask.w = MaskRCNNConfig::MASK_POOL_SIZE * 2;
    raw_mask.buffer.resize(raw_mask.h * raw_mask.w, 0);
    for (int i = 0; i < raw_mask.h * raw_mask.w; i++)
        raw_mask.buffer[i] = box.mask->raw[i];

    PPM<float> resized_mask;
    resized_mask.h = h;
    resized_mask.w = w;
    resized_mask.buffer.resize(h * w, 0);
    resizePPM<float>(raw_mask, resized_mask, h, w, 1);

    result.h = h;
    result.w = w;
    result.buffer.resize(result.h * result.w, 0);
    for (int i = 0; i < h * w; i++)
    {
        if (resized_mask.buffer[i] > mask_threshold)
        {
            result.buffer[i] = 1;
        }
    }

    return result;
}

void maskPPM(
    PPM<uint8_t>& image, const PPM<uint8_t>& mask, const int start_x, const int start_y, const std::vector<int>& color)
{

    float alpha = 0.6f;

    for (int y = 0; y < mask.h; ++y)
    {
        for (int x = 0; x < mask.w; ++x)
        {
            uint8_t mask_pixel = mask.buffer[y * mask.w + x];
            if (mask_pixel == 1)
            {
                assert(0 <= start_y + y && start_y + y < image.h);
                assert(0 <= start_x + x && start_x + x < image.w);

                int cur_y = start_y + y;
                int cur_x = start_x + x;

                float p_r = static_cast<float>(image.buffer[(cur_y * image.w + cur_x) * 3]);
                float p_g = static_cast<float>(image.buffer[(cur_y * image.w + cur_x) * 3 + 1]);
                float p_b = static_cast<float>(image.buffer[(cur_y * image.w + cur_x) * 3 + 2]);

                image.buffer[(cur_y * image.w + cur_x) * 3]
                    = static_cast<uint8_t>(std::max(0.0f, std::min(255.0f, p_r * (1 - alpha) + color[0] * alpha)));
                image.buffer[(cur_y * image.w + cur_x) * 3 + 1]
                    = static_cast<uint8_t>(std::max(0.0f, std::min(255.0f, p_g * (1 - alpha) + color[1] * alpha)));
                image.buffer[(cur_y * image.w + cur_x) * 3 + 2]
                    = static_cast<uint8_t>(std::max(0.0f, std::min(255.0f, p_b * (1 - alpha) + color[2] * alpha)));
            }
            //else
            //    assert(mask_pixel == 0);
        }
    }
}
void addBBoxPPM(PPM<uint8_t>& ppm, const BBoxInfo& box, const PPM<uint8_t>& resized_mask)
{
    const int x1 = box.box.x1;
    const int y1 = box.box.y1;
    const int x2 = box.box.x2;
    const int y2 = box.box.y2;
    std::vector<int> color = {rand() % 256, rand() % 256, rand() % 256};

    for (int x = x1; x <= x2; x++)
    {
        // bbox top border
        ppm.buffer[(y1 * ppm.w + x) * 3] = color[0];
        ppm.buffer[(y1 * ppm.w + x) * 3 + 1] = color[1];
        ppm.buffer[(y1 * ppm.w + x) * 3 + 2] = color[2];
        // bbox bottom border
        ppm.buffer[(y2 * ppm.w + x) * 3] = color[0];
        ppm.buffer[(y2 * ppm.w + x) * 3 + 1] = color[1];
        ppm.buffer[(y2 * ppm.w + x) * 3 + 2] = color[2];
    }

    for (int y = y1; y <= y2; y++)
    {
        // bbox left border
        ppm.buffer[(y * ppm.w + x1) * 3] = color[0];
        ppm.buffer[(y * ppm.w + x1) * 3 + 1] = color[1];
        ppm.buffer[(y * ppm.w + x1) * 3 + 2] = color[2];
        // bbox right border
        ppm.buffer[(y * ppm.w + x2) * 3] = color[0];
        ppm.buffer[(y * ppm.w + x2) * 3 + 1] = color[1];
        ppm.buffer[(y * ppm.w + x2) * 3 + 2] = color[2];
    }

    if (resized_mask.buffer.size() != 0)
    {
        maskPPM(ppm, resized_mask, x1, y1, color);
    }
}
} // namespace MaskRCNNUtils

struct SampleMaskRCNNParams : public samplesCommon::SampleParams
{
    std::string uffFileName;
    float maskThreshold;
};

class SampleMaskRCNN
{
    template <typename T>
    using SampleUniquePtr = std::unique_ptr<T, samplesCommon::InferDeleter>;

public:
    SampleMaskRCNN(const SampleMaskRCNNParams& params)
        : mParams(params)
        , mEngine(nullptr)
    {
        srand((int) time(0));
    }

    bool build();

    bool infer();

    bool teardown();

private:
    SampleMaskRCNNParams mParams;

    nvinfer1::Dims mInputDims;

    // original images
    std::vector<MaskRCNNUtils::PPM<uint8_t>> mOriginalPPMs;

    // processed images (resize + pad)
    std::vector<MaskRCNNUtils::PPM<uint8_t>> mPPMs;

    std::shared_ptr<nvinfer1::ICudaEngine> mEngine;

    bool constructNetwork(SampleUniquePtr<nvinfer1::IBuilder>& builder,
        SampleUniquePtr<nvinfer1::INetworkDefinition>& network, SampleUniquePtr<nvuffparser::IUffParser>& parser);

    bool processInput(const samplesCommon::BufferManager& buffers);

    bool verifyOutput(const samplesCommon::BufferManager& buffers);

    vector<MaskRCNNUtils::BBoxInfo> decodeOutput(const int imageIdx, void* detectionsHost, void* masksHost);
};

bool SampleMaskRCNN::build()
{
    initLibNvInferPlugins(&gLogger.getTRTLogger(), "");
    auto builder = SampleUniquePtr<nvinfer1::IBuilder>(nvinfer1::createInferBuilder(gLogger.getTRTLogger()));
    if (!builder)
    {
        return false;
    }

    auto network = SampleUniquePtr<nvinfer1::INetworkDefinition>(builder->createNetwork());
    if (!network)
    {
        return false;
    }

    auto parser = SampleUniquePtr<nvuffparser::IUffParser>(nvuffparser::createUffParser());
    if (!parser)
    {
        return false;
    }

    auto constructed = constructNetwork(builder, network, parser);
    if (!constructed)
    {
        return false;
    }

    assert(network->getNbInputs() == 1);
    mInputDims = network->getInput(0)->getDimensions();
    assert(mInputDims.nbDims == 3);

    assert(network->getNbOutputs() == 2);

    return true;
}

bool SampleMaskRCNN::constructNetwork(SampleUniquePtr<nvinfer1::IBuilder>& builder,
    SampleUniquePtr<nvinfer1::INetworkDefinition>& network, SampleUniquePtr<nvuffparser::IUffParser>& parser)
{

    gLogInfo << "xxxxxx " << MaskRCNNConfig::IMAGE_SHAPE << std::endl;
    parser->registerInput(
        mParams.inputTensorNames[0].c_str(), MaskRCNNConfig::IMAGE_SHAPE, nvuffparser::UffInputOrder::kNCHW);
    for (size_t i = 0; i < mParams.outputTensorNames.size(); i++)
        parser->registerOutput(mParams.outputTensorNames[i].c_str());
    gLogInfo<<locateFile(mParams.uffFileName, mParams.dataDirs).c_str()<< std::endl;
    auto parsed = parser->parse(locateFile(mParams.uffFileName, mParams.dataDirs).c_str(), *network, DataType::kFLOAT);
    if (!parsed)
    {
        return false;
    }

    builder->setMaxBatchSize(mParams.batchSize);
    builder->setMaxWorkspaceSize(2_GiB);
    builder->setFp16Mode(mParams.fp16);

    // Only for speed test
    mParams.int8 = true;
    if (mParams.int8)
    {
    //    samplesCommon::setAllTensorScales(network.get());
   //     builder->setInt8Mode(true);
    }

    mEngine = std::shared_ptr<nvinfer1::ICudaEngine>(builder->buildCudaEngine(*network), samplesCommon::InferDeleter());
    if (!mEngine)
    {
        return false;
    }

    return true;
}

bool SampleMaskRCNN::infer()
{
    // Create RAII buffer manager object
    samplesCommon::BufferManager buffers(mEngine, mParams.batchSize);

    auto context = SampleUniquePtr<nvinfer1::IExecutionContext>(mEngine->createExecutionContext());
    if (!context)
    {
        return false;
    }

    // Read the input data into the managed buffers
    assert(mParams.inputTensorNames.size() == 1);
    if (!processInput(buffers))
    {
        return false;
    }

    // Memcpy from host input buffers to device input buffers
    buffers.copyInputToDevice();

    auto tStart = std::chrono::high_resolution_clock::now();
    bool status;
    gLogInfo << "xxxxxx " << mParams.batchSize << std::endl;
    for (int i = 0; i < 10; i++)
    {
        status = context->execute(mParams.batchSize, buffers.getDeviceBindings().data());
    }
    auto tEnd = std::chrono::high_resolution_clock::now();
    float totalHost = std::chrono::duration<float, std::milli>(tEnd - tStart).count();
    gLogInfo << "Run for 10 times with Batch Size " << mParams.batchSize << std::endl;
    gLogInfo << "Average inference time is " << (totalHost / 10) / mParams.batchSize << " ms/frame" << std::endl;

    if (!status)
    {
        return false;
    }

    // Memcpy from device output buffers to host output buffers
    buffers.copyOutputToHost();

    // Post-process detections and verify results
    if (!verifyOutput(buffers))
    {
        return false;
    }

    return true;
}

bool SampleMaskRCNN::teardown()
{
    //! Clean up the libprotobuf files as the parsing is complete
    //! \note It is not safe to use any other part of the protocol buffers library after
    //! ShutdownProtobufLibrary() has been called.
    nvuffparser::shutdownProtobufLibrary();
    return true;
}

bool SampleMaskRCNN::processInput(const samplesCommon::BufferManager& buffers)
{
    const int inputC = mInputDims.d[0];
    const int inputH = mInputDims.d[1];
    const int inputW = mInputDims.d[2];
    const int batchSize = mParams.batchSize;

    // Available images
    std::vector<std::string> imageListCandidates = {"001763.ppm", "004545.ppm"};
    std::vector<std::string> imageList;
    for (int i = 0; i < batchSize; i++)
    {
        imageList.push_back(imageListCandidates[i % 2]);
    }

    mPPMs.resize(batchSize);
    mOriginalPPMs.resize(batchSize);
    assert(mPPMs.size() <= imageList.size());
    for (int i = 0; i < batchSize; ++i)
    {
        MaskRCNNUtils::readPPMFile(locateFile(imageList[i], mParams.dataDirs), mOriginalPPMs[i]);
        MaskRCNNUtils::preprocessPPM(mOriginalPPMs[i], mPPMs[i], inputH, inputW);
    }

    float* hostDataBuffer = static_cast<float*>(buffers.getHostBuffer(mParams.inputTensorNames[0]));
    float pixelMean[3]{123.7, 116.8, 103.9};
    // Host memory for input buffer
    for (int i = 0, volImg = inputC * inputH * inputW; i < mParams.batchSize; ++i)
    {
        for (int c = 0; c < inputC; ++c)
        {
            // The color image to input should be in RGB order
            for (unsigned j = 0, volChl = inputH * inputW; j < volChl; ++j)
            {
                hostDataBuffer[i * volImg + c * volChl + j] = float(mPPMs[i].buffer[j * inputC + c]) - pixelMean[c];
            }
        }
    }

    return true;
}

vector<MaskRCNNUtils::BBoxInfo> SampleMaskRCNN::decodeOutput(const int imageIdx, void* detectionsHost, void* masksHost)
{
    int input_dim_h = MaskRCNNConfig::IMAGE_SHAPE.d[1], input_dim_w = MaskRCNNConfig::IMAGE_SHAPE.d[2];
    assert(input_dim_h == input_dim_w);
    int image_height = mOriginalPPMs[imageIdx].h;
    int image_width = mOriginalPPMs[imageIdx].w;
    // resize the DsImage with scale
    const int image_dim = std::max(image_height, image_width);
    int resizeH = (int) image_height * input_dim_h / (float) image_dim;
    int resizeW = (int) image_width * input_dim_w / (float) image_dim;
    // keep accurary from (float) to (int), then to float
    float window_x = (1.0f - (float) resizeW / input_dim_w) / 2.0f;
    float window_y = (1.0f - (float) resizeH / input_dim_h) / 2.0f;
    float window_width = (float) resizeW / input_dim_w;
    float window_height = (float) resizeH / input_dim_h;

    float final_ratio_x = (float) image_width / window_width;
    float final_ratio_y = (float) image_height / window_height;

    std::vector<MaskRCNNUtils::BBoxInfo> binfo;

    int detectionOffset = samplesCommon::volume(MaskRCNNConfig::MODEL_DETECTION_SHAPE); // (100,6)
    int maskOffset = samplesCommon::volume(MaskRCNNConfig::MODEL_MASK_SHAPE);           // (100, 81, 28, 28)

    MaskRCNNUtils::RawDetection* detections
        = reinterpret_cast<MaskRCNNUtils::RawDetection*>((float*) detectionsHost + imageIdx * detectionOffset);
    MaskRCNNUtils::Mask* masks = reinterpret_cast<MaskRCNNUtils::Mask*>((float*) masksHost + imageIdx * maskOffset);
    for (int det_id = 0; det_id < MaskRCNNConfig::DETECTION_MAX_INSTANCES; det_id++)
    {
        MaskRCNNUtils::RawDetection cur_det = detections[det_id];
        int label = (int) cur_det.class_id;
        if (label <= 0)
            continue;

        MaskRCNNUtils::BBoxInfo det;
        det.label = label;
        det.prob = cur_det.score;

        det.box.x1 = std::min(std::max((cur_det.x1 - window_x) * final_ratio_x, 0.0f), (float) image_width);
        det.box.y1 = std::min(std::max((cur_det.y1 - window_y) * final_ratio_y, 0.0f), (float) image_height);
        det.box.x2 = std::min(std::max((cur_det.x2 - window_x) * final_ratio_x, 0.0f), (float) image_width);
        det.box.y2 = std::min(std::max((cur_det.y2 - window_y) * final_ratio_y, 0.0f), (float) image_height);

        if (det.box.x2 <= det.box.x1 || det.box.y2 <= det.box.y1)
            continue;

        det.mask = masks + det_id * MaskRCNNConfig::NUM_CLASSES + label;

        binfo.push_back(det);
    }

    return binfo;
}

bool SampleMaskRCNN::verifyOutput(const samplesCommon::BufferManager& buffers)
{
    void* detectionsHost = buffers.getHostBuffer(mParams.outputTensorNames[0]);
    void* masksHost = buffers.getHostBuffer(mParams.outputTensorNames[1]);

    bool pass = true;

    for (int p = 0; p < mParams.batchSize; ++p)
    {
        vector<MaskRCNNUtils::BBoxInfo> binfo = decodeOutput(p, detectionsHost, masksHost);
        for (size_t roi_id = 0; roi_id < binfo.size(); roi_id++)
        {
            const auto resized_mask = MaskRCNNUtils::resizeMask(binfo[roi_id], mParams.maskThreshold); // mask threshold
            MaskRCNNUtils::addBBoxPPM(mOriginalPPMs[p], binfo[roi_id], resized_mask);

            gLogInfo << "Detected " << MaskRCNNConfig::CLASS_NAMES[binfo[roi_id].label] << " in"
                     << mOriginalPPMs[p].fileName << " with confidence " << binfo[roi_id].prob * 100.f
                     << " and coordinates (" << binfo[roi_id].box.x1 << ", " << binfo[roi_id].box.y1 << ", "
                     << binfo[roi_id].box.x2 << ", " << binfo[roi_id].box.y2 << ")" << std::endl;
        }
        gLogInfo << "The results are stored in current directory: " << std::to_string(p) + ".ppm" << std::endl;
        MaskRCNNUtils::writePPMFile(std::to_string(p) + ".ppm", mOriginalPPMs[p]);
    }

    return pass;
}

SampleMaskRCNNParams initializeSampleParams(const samplesCommon::Args& args)
{
    SampleMaskRCNNParams params;
    if (args.dataDirs.empty())
    {
        params.dataDirs.push_back("data/maskrcnn/");
        params.dataDirs.push_back("data/maskrcnn/images/");
        params.dataDirs.push_back("data/samples/maskrcnn/");
        params.dataDirs.push_back("data/samples/maskrcnn/images/");
    }
    else
    {
        params.dataDirs = args.dataDirs;
    }

    params.inputTensorNames.push_back(MaskRCNNConfig::MODEL_INPUT);
    params.batchSize = args.batch;
    params.outputTensorNames.push_back(MaskRCNNConfig::MODEL_OUTPUTS[0]);
    params.outputTensorNames.push_back(MaskRCNNConfig::MODEL_OUTPUTS[1]);
    params.dlaCore = args.useDLACore;
    params.int8 = args.runInInt8;
    params.fp16 = args.runInFp16;

    params.uffFileName = MaskRCNNConfig::MODEL_NAME;
    gLogInfo<<params.uffFileName<< std::endl;
    params.maskThreshold = MaskRCNNConfig::MASK_THRESHOLD;

    return params;
}

void printHelpInfo()
{
    std::cout << "Usage: ./sample_maskRCNN [-h or --help] [-d or --datadir=<path to data directory>]" << std::endl;
    std::cout << "--help          Display help information" << std::endl;
    std::cout << "--datadir       Specify path to a data directory, overriding the default. This option can be used "
                 "multiple times to add multiple directories. If no data directories are given, the default is to use "
                 "data/samples/maskrcnn/ and data/maskrcnn/"
              << std::endl;
    std::cout << "--fp16          Specify to run in fp16 mode." << std::endl;
    std::cout << "--batch         Specify inference batch size." << std::endl;
}

int main(int argc, char** argv)
{
    samplesCommon::Args args;
    bool argsOK = samplesCommon::parseArgs(args, argc, argv);
    if (!argsOK)
    {
        gLogError << "Invalid arguments" << std::endl;
        printHelpInfo();
        return EXIT_FAILURE;
    }
    if (args.help)
    {
        printHelpInfo();
        return EXIT_SUCCESS;
    }

    auto sampleTest = gLogger.defineTest(gSampleName, argc, argv);

    gLogger.reportTestStart(sampleTest);

    SampleMaskRCNN sample(initializeSampleParams(args));

    gLogInfo << "Building and running a GPU inference engine for Mask RCNN" << std::endl;

    if (!sample.build())
    {
        return gLogger.reportFail(sampleTest);
    }
    if (!sample.infer())
    {
        return gLogger.reportFail(sampleTest);
    }
    if (!sample.teardown())
    {
        return gLogger.reportFail(sampleTest);
    }

    return gLogger.reportPass(sampleTest);
}

CMakeList.txt

cmake_minimum_required(VERSION 2.8)
project (maskRCNN)
file (GLOB _INCS "*.h")
file (GLOB _SRCS "*.cpp" "*.cu")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")

find_package(OpenCV REQUIRED)
include_directories(${OpenCV_INCLUDE_DIRS})
#include_directories(tensorrt/include/)
#link_directories(tensorrt/lib/)
include_directories(cuda/include/)
link_directories(cuda/lib64/)
include_directories(glog/include/)
link_directories(glog/lib/)
include_directories(glags/include/)
link_directories(gflags/lib/)

include_directories(/home/zhao/Downloads/TensorRT-7.0.0.11/samples/common/)
include_directories(hdf5/include/)
link_directories(hdf5/lib/)
include_directories(/home/zhao/Downloads/TensorRT-7.0.0.11/include/)
link_directories(/home/zhao/Downloads/TensorRT-7.0.0.11/lib/)
include_directories(/home/zhao/opt/cudnn5/cuda/include/)
link_directories(/home/zhao/opt/cudnn5/cuda/lib64/)

set(Caffe_ROOT "/home/zhao/opt/caffe/build/install")#$ENV{Caffe_ROOT})
set(Caffe_DIR ${Caffe_ROOT}/share/Caffe)
include_directories(${Caffe_ROOT}/include)
# CUDA
find_package (CUDA)
include_directories (${CUDA_INCLUDE_DIRS})
message (STATUS "CUDA_INCLUDE_DIRS: ${CUDA_INCLUDE_DIRS}")
link_directories(${CUDA_TOOLKIT_ROOT_DIR}/lib64/)
find_package( OpenCV REQUIRED )
include_directories( ${OpenCV_INCLUDE_DIRS} )
set(TensorRT_ROOT $ENV{TensorRT_ROOT})
include_directories(${TensorRT_ROOT}/include/)
link_directories(${TensorRT_ROOT}/lib/)
add_executable( maskRCNN ${_SRCS} )

target_link_libraries( maskRCNN ${OpenCV_LIBS} nvparsers nvinfer nvinfer_plugin cudnn cuda cudart cublas ${OpenCV_LIBS} )