tensorRT 多batch 推理

Cindy_1224
已于 2023-04-18 10:04:52 修改
阅读量1.2k
点赞数
分类专栏：机器学习实践文章标签：计算机视觉 Powered by 金山文档
于 2023-02-27 18:27:02 首次发布
本文链接：https://blog.csdn.net/cindywry/article/details/129247270
版权
机器学习实践专栏收录该内容
13 篇文章 0 订阅
订阅专栏
#include <NvInfer.h>
#include <NvInferRuntime.h>
#include "NvInfer.h"
#include "cuda_runtime_api.h"
// cuda include
#include <cuda_runtime.h>
#include <stdio.h>
#include <math.h>

#include <iostream>
#include <fstream>
#include <vector>
#include <memory>
#include <functional>
#include <unistd.h>
#include "logging.h"
#include "utils.hpp"
#include <opencv2/opencv.hpp>
#include <string>
#include <vector>
#include <tuple>
#include <time.h>

using namespace nvinfer1;
using namespace std;
static Logger gLogger;

#define checkRuntime(op)  __check_cuda_runtime((op), #op, __FILE__, __LINE__)

bool __check_cuda_runtime(cudaError_t code, const char* op, const char* file, int line){
    if(code != cudaSuccess){
        const char* err_name = cudaGetErrorName(code);
        const char* err_message = cudaGetErrorString(code);
        printf("runtime error %s:%d  %s failed. \n  code = %s, message = %s\n", file, line, op, err_name, err_message);
        return false;
    }
    return true;
}


// // 通过智能指针管理nv返回的指针参数
// // 内存自动释放，避免泄漏
// template<typename _T>
// shared_ptr<_T> make_nvshared(_T* ptr){
//     return shared_ptr<_T>(ptr, [](_T* p){p->destroy();});
// }

// bool exists(const string& path){

// #ifdef _WIN32
//     return ::PathFileExistsA(path.c_str());
// #else
//     return access(path.c_str(), R_OK) == 0;
// #endif
// }

vector<unsigned char> load_file(const string& file){
    ifstream in(file, ios::in | ios::binary);
    if (!in.is_open())
        return {};

    in.seekg(0, ios::end);
    size_t length = in.tellg();

    std::vector<uint8_t> data;
    if (length > 0){
        in.seekg(0, ios::beg);
        data.resize(length);

        in.read((char*)&data[0], length);
    }
    in.close();
    return data;
}

IRuntime* runtime = nullptr;

ICudaEngine* loadEngine(const std::string& engine, int DLACore)
{
    std::ifstream engineFile(engine, std::ios::binary);
    if (!engineFile)
    {
        std::cout << "Error opening engine file: " << engine << std::endl;
        return nullptr;
    }

    engineFile.seekg(0, engineFile.end);
    long int fsize = engineFile.tellg();
    engineFile.seekg(0, engineFile.beg);

    std::vector<char> engineData(fsize);
    engineFile.read(engineData.data(), fsize);
    if (!engineFile)
    {
        std::cout << "Error loading engine file: " << engine << std::endl;
        return nullptr;
    }

     runtime = createInferRuntime(gLogger);
    if (DLACore != -1)
    {
        runtime->setDLACore(DLACore);
    }

    return runtime->deserializeCudaEngine(engineData.data(), fsize, nullptr);
}

double timestamp_now_float() {
    return chrono::duration_cast<chrono::microseconds>(chrono::system_clock::now().time_since_epoch()).count() / 1000.0;
}

void model_infer(std::string mode_Path,std::string img_path, int input_batchsize, int input_height, int input_width, int output_size)
{
    // std::string strTrtSavedPath = "./res_hjxu_temp_dynamic.trt";
    int maxBatchSize = 1000;
    
    // 1、反序列化加载引擎
    ICudaEngine* engine = loadEngine(mode_Path, 0);
    
    // 2、创建context
    IExecutionContext* context = engine->createExecutionContext();

    int nNumBindings = engine->getNbBindings();
    std::vector<void*> vecBuffers;
    vecBuffers.resize(nNumBindings);
    int nInputIdx = 0;
    int nOutputIndex = 1;
    int nInputSize =  3 * input_height * input_width * sizeof(float);
    
    printf("output size:%d\n", output_size);
    // Create stream
    cudaStream_t stream;
    cudaStreamCreate(&stream);

    // 4、在cuda上创建一个最大的内存空间
    float* output_data_host = nullptr;
    (cudaMalloc(&vecBuffers[nInputIdx], nInputSize * maxBatchSize));
    (cudaMalloc(&vecBuffers[nOutputIndex], maxBatchSize * output_size * sizeof(float)));
    checkRuntime(cudaMallocHost(&output_data_host, maxBatchSize * output_size * sizeof(float)));

    std::string input_image_path = img_path;
    std::string imgPath = img_path;
    std::vector<std::string> imagList;
    std::vector<std::string> fileType{"jpg", "png"};
    readFileList(const_cast<char *>(imgPath.c_str()), imagList, fileType);
    double sumTime = 0;
    int img_size = 0;
    for (auto &input_image_path : imagList)
    {
        cv::Mat img = cv::imread(input_image_path);
        cv::Mat matRzImg;
        cv::resize(img, matRzImg, cv::Size(input_width, input_height));
        cv::Mat matF32Img;
        matRzImg.convertTo(matF32Img, CV_32FC3);
        matF32Img = matF32Img / 255.;
        cudaMemcpy((unsigned char *)vecBuffers[nInputIdx] + nInputSize*img_size, matF32Img.data, nInputSize, cudaMemcpyHostToDevice);
        img_size ++;
    }
    
    // 动态维度，设置batch = 1 0指第0个tensor的维度
    context->setBindingDimensions(0, Dims4(input_batchsize, 3, input_width, input_height));
    
    int times = 10000;
    // double begin_time = cv::getTickCount();
    auto begin_timer = timestamp_now_float();
    for(int i = 0; i < times; i++ ){
        //infer model
        context->executeV2(vecBuffers.data());
    }
    float total_inference_time = (timestamp_now_float() - begin_timer);
    printf(" total: %.2f ms, input_batchsize:%d\n", total_inference_time, input_batchsize);
    float fps = 1000 / ( total_inference_time / times) * input_batchsize;
    printf("FPS:[%.2f] \n", fps);

    (cudaMemcpy(output_data_host, vecBuffers[nOutputIndex], maxBatchSize * output_size * sizeof(float), cudaMemcpyDeviceToHost));
    
    // double end_time = cv::getTickCount();
    // double time_cost = (end_time - begin_time)/cv::getTickFrequency()*1000;
    // std::cout<<"inter cost time is "<<time_cost<<" ms"<<std::endl;
    // std::cout<<"averageTime:"<< (time_cost/(imagList.size()-1)) <<"ms"<<std::endl;

    // // 动态维度，设置batch = 4
    // context->setBindingDimensions(0, Dims4(4, 1, 112, 112));
    // context->executeV2(vecBuffers.data());
    // //context->execute(1, vecBuffers.data());
    // (cudaMemcpy(prob, vecBuffers[nOutputIndex], maxBatchSize * 2 * sizeof(float), cudaMemcpyDeviceToHost));
    return ;
}



int main(int argc, char **argv){
    if(argc != 7){
        printf("input paramer error, eg: ./Perception model_Path  img_path  batch height width output_size\n");
        return -1;
    }
    cudaSetDevice(0);
    int input_batchsize = atoi(argv[3]);
    int input_height = atoi(argv[4]);
    int input_width = atoi(argv[5]);
    int output_size = atoi(argv[6]);

    // printf("%d, %d, %d\n",input_batchsize, input_height, input_width, output_size);
    model_infer(argv[1], argv[2], input_batchsize, input_height, input_width, output_size);
    return 0;
}