TensorRT加速模型推断时间方法介绍（c++ pytorch模型）

置顶竹叶青lvye

已于 2022-06-18 14:45:01 修改

阅读量6.7k

点赞数 4

分类专栏： Python与深度学习文章标签： pytorch c++ 人工智能 tensorrt

于 2022-03-14 23:29:30 首次发布

本文链接：https://blog.csdn.net/jiugeshao/article/details/123454004

版权

Python与深度学习专栏收录该内容

22 篇文章 10 订阅

订阅专栏

这里就用QTcreator来作为c++的IDE，其安装及opencv的配置（博主还是喜欢源码编译自己需要的opencv版本）过程见我之前的博客。 Ubuntu20.04 C++程序的简单编译及QT和Pycharm的配置_jiugeshao的专栏-CSDN博客_pycharm编译c++1. 使用gcc来编译c++程序,如下两个命令完成了创建c++程序及打开文本编辑程序2.拿牛客网上第HJ6求质数因子题（华为机试题库_在线编程+题解_牛客题霸_牛客网）代码进行测试，代码编辑如下：#include <iostream>using namespace std; int main(){ long n; cin>>n; for(int i=2;i*i<=n;i++) { while(.https://blog.csdn.net/jiugeshao/article/details/120406478?spm=1001.2014.3001.5502

Ubuntu下Qt Creator配置opencv_jiugeshao的专栏-CSDN博客1.首先记录下如何用Qt Creator做一个界面程序，新建一个项目如上选择后选择好路径，输入项目名称后，点击继续如下选择Desktop Qt 5.12.1 GCC 65bit选项，可以点击Details来做展开查看继续到如下界面可参照我之前的博客里做设计程序界面pcharm配置pyqt5(Anaconda3 python环境)做界面开发_jiugeshao的专栏-CSDN博客运行程序，点击界面上的show按钮，文本框内便出现了文本2. ...https://blog.csdn.net/jiugeshao/article/details/120578698?spm=1001.2014.3001.5502纯c++工程选择项

进入项目后，help菜单下about plugins可以调出如下配置界面，取消标红框的勾选，不然可能会发现，没有输入提示功能。

再一次跑一个opencv例子，正常运行。

准备工作结束后，开始进入主题部分：

主要还是参考官网，这里以自带例子sampleOnnxMNIST.cpp为蓝本进行改动。

直接将代码复制到main.cpp（QTCreator所创建）中

工程配置信息如下，因为后面会用到opencv，这边也都添加了，注意标红的框，如果不加，会报和gLogger相关的如下错误：

/home/sxhlvye/Trial/tensorrt_c++/test4/main.cpp:168: error: undefined reference to `sample::gLogger'

/home/sxhlvye/Trial/tensorrt_c++/test4/main.cpp:357: error: undefined reference to `sample::gLogError'

/home/sxhlvye/Trial/tensorrt_c++/test4/main.cpp:286: error: undefined reference to `sample::gLogInfo'

TEMPLATE = app
CONFIG += console c++11
CONFIG -= app_bundle
CONFIG -= qt

SOURCES += \
        main.cpp


INCLUDEPATH += /usr/local/include \
               /usr/local/include/opencv \
               /usr/local/include/opencv2 \
               /home/sxhlvye/Downloads/TensorRT-8.0.0.3/include \
               /home/sxhlvye/Downloads/TensorRT-8.0.0.3/samples/common \
               /home/sxhlvye/Downloads/TensorRT-8.0.0.3/samples/common/logger.cpp \
               /usr/local/cuda-11.0/include

LIBS += /usr/local/lib/libopencv_highgui.so \
        /usr/local/lib/libopencv_core.so \
        /usr/local/lib/libopencv_imgproc.so \
        /usr/local/lib/libopencv_imgcodecs.so \
        /usr/local/cuda-11.0/lib64/libcudart.so \
        /home/sxhlvye/Downloads/TensorRT-8.0.0.3/lib/libnvinfer.so \
        /home/sxhlvye/Downloads/TensorRT-8.0.0.3/targets/x86_64-linux-gnu/bin/common/logger.o \
        /home/sxhlvye/Downloads/TensorRT-8.0.0.3/lib/libnvparsers.so \
        /home/sxhlvye/Downloads/TensorRT-8.0.0.3/lib/libnvinfer_plugin.so \
        /home/sxhlvye/Downloads/TensorRT-8.0.0.3/lib/libnvonnxparser.so \

这里直接使用前面已经转化好的Onnx模型（由pytorch模型实现转换而来，见前面博客）来测试。里面所用到的

imagenet_classes.txt见网址

详细代码如下（以官方自带例子sampleOnnxMNIST.cpp基础上进行的改写，大体分为将ONNX转化为engine, 然后读取图片进行归一化，再统计预测值），其中图片归一化是自己模仿前面博客中的归一化所写，短边375缩放至256，长边500等比例缩放至341，再做中心裁切，224x224大小，再减去均值，比上方差。

#include "argsParser.h"
#include "buffers.h"
#include "common.h"
#include "logger.h"
#include "parserOnnxConfig.h"

#include "NvInfer.h"
#include <cuda_runtime_api.h>

#include <opencv2/core/core.hpp>
#include <opencv2/opencv.hpp>

#include <cstdlib>
#include <fstream>
#include <iostream>
#include <sstream>

using namespace std;
using namespace cv;

class SampleOnnx
{
    template <typename T>
    using SampleUniquePtr = std::unique_ptr<T, samplesCommon::InferDeleter>;

public:
    SampleOnnx(const samplesCommon::OnnxSampleParams& params)
        : mParams(params)
        , mEngine(nullptr)
    {
    }

    //! \brief Function builds the network engine
    bool build();


    //! \brief Runs the TensorRT inference engine for this sample
    bool infer();

private:
    samplesCommon::OnnxSampleParams mParams; //!< The parameters for the sample.

    nvinfer1::Dims mInputDims;  //!< The dimensions of the input to the network.
    nvinfer1::Dims mOutputDims; //!< The dimensions of the output to the network.
    int mNumber{0};             //!< The number to classify

    std::shared_ptr<nvinfer1::ICudaEngine> mEngine; //!< The TensorRT engine used to run the network

    //! \brief Parses an ONNX model and creates a TensorRT network

    bool constructNetwork(SampleUniquePtr<nvinfer1::IBuilder>& builder,
        SampleUniquePtr<nvinfer1::INetworkDefinition>& network, SampleUniquePtr<nvinfer1::IBuilderConfig>& config,
        SampleUniquePtr<nvonnxparser::IParser>& parser);


    //! \brief Reads the input  and stores the result in a managed buffer
    bool processInput(const samplesCommon::BufferManager& buffers);


    //! \brief Classifies digits and verify result
    bool getOutput(const samplesCommon::BufferManager& buffers);
};

//!
//! \brief Creates the network, configures the builder and creates the network engine
//!
//! \details This function creates the Onnx network by parsing the Onnx model and builds
//!          the engine
//!
//! \return Returns true if the engine was created successfully and false otherwise
//!
bool SampleOnnx::build()
{
    auto builder = SampleUniquePtr<nvinfer1::IBuilder>(nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger()));

    if (!builder)
    {
        return false;
    }


    const auto explicitBatch = 1U << static_cast<uint32_t>(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
    auto network = SampleUniquePtr<nvinfer1::INetworkDefinition>(builder->createNetworkV2(explicitBatch));
    if (!network)
    {
        return false;
    }


    auto config = SampleUniquePtr<nvinfer1::IBuilderConfig>(builder->createBuilderConfig());
    if (!config)
    {
        return false;
    }

    auto parser = SampleUniquePtr<nvonnxparser::IParser>(nvonnxparser::createParser(*network, sample::gLogger.getTRTLogger()));
    if (!parser)
    {
        return false;
    }


    //In this function will use the onnx file path
    auto constructed = constructNetwork(builder, network, config, parser);
    if (!constructed)
    {
        return false;
    }


    mEngine = std::shared_ptr<nvinfer1::ICudaEngine>(builder->buildEngineWithConfig(*network, *config), samplesCommon::InferDeleter());

    if (!mEngine)
    {
        return false;
    }

    ASSERT(network->getNbInputs() == 1);
    mInputDims = network->getInput(0)->getDimensions();
    ASSERT(mInputDims.nbDims == 4);

    ASSERT(network->getNbOutputs() == 1);
    mOutputDims = network->getOutput(0)->getDimensions();
    ASSERT(mOutputDims.nbDims == 2);

    return true;
}

//!
//! \brief Uses a ONNX parser to create the Onnx MNIST Network and marks the
//!        output layers
//!
//! \param network Pointer to the network that will be populated with the Onnx MNIST network
//!
//! \param builder Pointer to the engine builder
//!
bool SampleOnnx::constructNetwork(SampleUniquePtr<nvinfer1::IBuilder>& builder,
    SampleUniquePtr<nvinfer1::INetworkDefinition>& network, SampleUniquePtr<nvinfer1::IBuilderConfig>& config,
    SampleUniquePtr<nvonnxparser::IParser>& parser)
{

    auto parsed = parser->parseFromFile(mParams.onnxFileName.c_str() , static_cast<int>(sample::gLogger.getReportableSeverity()));
    if (!parsed)
    {
        return false;
    }

    config->setMaxWorkspaceSize(512_MiB);
    if (mParams.fp16)
    {
        config->setFlag(BuilderFlag::kFP16);
    }
    if (mParams.int8)
    {
        config->setFlag(BuilderFlag::kINT8);
        samplesCommon::setAllDynamicRanges(network.get(), 127.0f, 127.0f);
    }

    samplesCommon::enableDLA(builder.get(), config.get(), mParams.dlaCore);

    return true;
}

//!
//! \brief Runs the TensorRT inference engine for this sample
//!
//! \details This function is the main execution function of the sample. It allocates the buffer,
//!          sets inputs and executes the engine.
//!
bool SampleOnnx::infer()
{
    // Create RAII buffer manager object
    samplesCommon::BufferManager buffers(mEngine);

    auto context = SampleUniquePtr<nvinfer1::IExecutionContext>(mEngine->createExecutionContext());
    if (!context)
    {
        return false;
    }

    //Read the image into the managed buffers
    ASSERT(mParams.inputTensorNames.size() == 1);

    if (!processInput(buffers))
    {
        return false;
    }

    // Memcpy from host input buffers to device input buffers
    buffers.copyInputToDevice();

    bool status = context->executeV2(buffers.getDeviceBindings().data());
    if (!status)
    {
        return false;
    }

    // Memcpy from device output buffers to host output buffers
    buffers.copyOutputToHost();

    // Verify results
    if (!getOutput(buffers))
    {
        return false;
    }

    return true;
}

//!
//! \brief Reads the input and stores the result in a managed buffer
//!
bool SampleOnnx::processInput(const samplesCommon::BufferManager& buffers)
{
    cout << "mINputDims.d[0]" << mInputDims.d[0] << std::endl;
    cout << "mINputDims.d[1]" << mInputDims.d[1] << std::endl;
    cout << "mINputDims.d[2]" << mInputDims.d[2] << std::endl;
    cout << "mINputDims.d[3]" << mInputDims.d[3] << std::endl;

    cv::Mat image = cv::imread("/home/sxhlvye/Trial/yolov3-9.5.0/2008_002682.jpg", cv::IMREAD_COLOR);
    cv::cvtColor(image, image, cv::COLOR_BGR2RGB);
    cout << image.channels() << "," << image.size().width << "," << image.size().height << std::endl;

    cv::Mat dst = cv::Mat::zeros(341, 256, CV_32FC3);
    cv::resize(image, dst, dst.size());
    cout << dst.channels() << "," << dst.size().width << "," << dst.size().height << std::endl;

    cv::Mat dst1 = dst(Range(58, 282), Range(16, 240)).clone();
    cout << dst1.channels() << "," << dst1.size().width << "," << dst1.size().height << std::endl;

    static const float mean[3] = { 0.485f, 0.456f, 0.406f };
    static const float Std[3] = { 0.229f, 0.224f, 0.225f };

    const int channel = mInputDims.d[1];
    const int inputH = mInputDims.d[2];
    const int inputW = mInputDims.d[3];
    // Read a random digit file

    std::vector<float> fileData(inputH * inputW * channel);

    for (int c = 0; c < channel; ++c)
    {
        for (int i = 0; i < dst1.rows; ++i)
        {
            cv::Vec3b *p1 = dst1.ptr<cv::Vec3b>(i);
            for (int j = 0; j < dst1.cols; ++j)
            {
                fileData[c * dst1.cols * dst1.rows + i * dst1.cols + j] = (p1[j][c] / 255.0f - mean[c]) / Std[c];
            }
        }
    }


    float* hostDataBuffer = static_cast<float*>(buffers.getHostBuffer(mParams.inputTensorNames[0]));
    for (int i = 0; i < inputH * inputW * channel; i++)
    {
        hostDataBuffer[i] = fileData[i];
    }


    return true;
}

bool cmp(int a,int b)
{
    return a>b;
}
//! \brief Classifies digits and verify result
//!
//! \return whether the classification output matches expectations
//!
//!
//!

bool SampleOnnx::getOutput(const samplesCommon::BufferManager& buffers)
{
    const int outputSize = mOutputDims.d[1];
    std::cout << "outputSize: " << outputSize << std::endl;
    float* output = static_cast<float*>(buffers.getHostBuffer(mParams.outputTensorNames[0]));
    float val{0.0f};
    int idx{0};

    // Calculate Softmax
    float sum{0.0f};
    for (int i = 0; i < outputSize; i++)
    {
        output[i] = exp(output[i]);
        sum += output[i];
    }

    sample::gLogInfo << "Output:" << std::endl;
    for (int i = 0; i < outputSize; i++)
    {
        output[i] /= sum;
    }

    vector<float> voutput(1000);
    for (int i = 0; i < outputSize; i++)
    {
        voutput[i] = output[i];
    }

    for(int i=0; i<1000; i++)
    {
        for(int j= i+1; j< 1000; j++)
        {
            if(output[i] < output[j])
            {
                int temp;
                temp = output[i];
                output[i] = output[j];
                output[j] = temp;
            }
        }
    }

    for(int i=0; i<5;i++)
    {
        cout << output[i] << std::endl;
    }

    vector<string> labels;
    string line;

    ifstream readFile("/home/sxhlvye/Trial/yolov3-9.5.0/imagenet_classes.txt");
    while (getline(readFile,line))
    {
        //istringstream record(line);
        //string label;
       // record >> label;
        cout << line << std::endl;
        labels.push_back(line);
    }

    vector<int> indexs(5);

    for(int i=0; i< 1000;i++)
    {
        if(voutput[i] == output[0])
        {
            indexs[0] = i;
        }

        if(voutput[i] == output[1])
        {
            indexs[1] = i;
        }

        if(voutput[i] == output[2])
        {
            indexs[2] = i;
        }

        if(voutput[i] == output[3])
        {
            indexs[3] = i;
        }
        if(voutput[i] == output[4])
        {
            indexs[4] = i;
        }

    }



    cout << "top 5: " << std::endl;

    cout << labels[indexs[0]] << "--->" << output[0] << std::endl;

    cout << labels[indexs[1]] << "--->" << output[1] << std::endl;

    cout << labels[indexs[2]] << "--->" << output[2] << std::endl;

    cout << labels[indexs[3]] << "--->" << output[3] << std::endl;

    cout << labels[indexs[4]] << "--->" << output[4] << std::endl;



    return true;
}

int main()
{
    samplesCommon::OnnxSampleParams params;

    params.onnxFileName = "/home/sxhlvye/Trial/yolov3-9.5.0/vgg16.onnx";
    params.inputTensorNames.push_back("input");
    params.outputTensorNames.push_back("output");
    params.fp16 = false;
    params.int8 = false;
    params.batchSize = 1;

    SampleOnnx sampleonnx(params);

    //step1: covert onnx to tensorrt, and can get a engine
    if (!sampleonnx.build())
    {
        std::cout << "conver oNNX to Tensorrt fail!" << std::endl;
    }

    std::cout << "conver oNNX to Tensorrt ok!" << std::endl;


    if (!sampleonnx.infer())
    {
      std::cout << "infer fail!" << std::endl;
    }

    std::cout << "infer ok!" << std::endl;

}

运行结果如下：

虽然top5预测值和前面博客不一致(一些预处理方面可能带来了差异），但按最大得分预测值都为tigger cat