Yolov5 TensorRT .engine文件的生成和推理过程，以及如何从GPU中得到图片进行自定义(根据任务需求的)后处理

最新推荐文章于 2024-04-14 14:01:14 发布

highoooo

最新推荐文章于 2024-04-14 14:01:14 发布

阅读量3.3k

点赞数

分类专栏： Computer Vision C++ 文章标签： c++ python 网络协议

本文链接：https://blog.csdn.net/highoooo/article/details/121394010

版权

C++ 同时被 2 个专栏收录

35 篇文章 0 订阅

订阅专栏

Computer Vision

34 篇文章 3 订阅

订阅专栏

文章目录

一、create a model using the API directly and serialize it to a stream
二、deserialize the .engine and data prepare
三、run inference
四、get image

warmup：
运行时具有延迟初始化的组件，这可能导致加载后发送给模型的第一个请求的等待时间较长，此延迟可能比单个推理请求的延迟高几个数量级。为了减少初始化的延迟对请求的影响，可以在模型加载时通过提供一组推理请求样本和 SavedModel 来触发子系统和组件的初始化，此过程称为 “预热” 模型。

一、create a model using the API directly and serialize it to a stream

//如果不存在wts_name
    //create a model using the API directly and serialize it to a stream
    if (!wts_name.empty(){
        IHostMemory* modelStream{ nullptr } 
        //Class to handle library allocated memory that is accessible to the user.
        //The memory allocated via the host memory object is owned by the library and will be de-allocated when the destroy method is called.
        APIToModel(BATCH_SIZE, &modelStream, is_p6, gd, gw, wts_name)
        // wts->.engine
        assert(modelStream != nullptr);
        //modelstream -> serialize engine
        std::ofstream p(engine_name, std::ios::binary);
        //open output file which is named "engine_name" and used "std::ios::binary" format
        if (!p) {
            std::cerr << "could not open plan output file" << std::endl;
            return -1;
        }
        // APIToModel() and build_engine() will return a ( serialized engine )"modelStream"
        // then write the "modelStream" into output file as the .engine file
        p.write(reinterpret_cast<const char*>(modelStream->data()), modelStream->size());
        modelStream->destroy();
        return 0;
    })

二、deserialize the .engine and data prepare

// deserialize the .engine and run inference
     std::ifstream file(engine_name, std::ios::binary)
    // 如果想以输入方式(只写)打开，就用ifstream来定义;
    // 如果想以输出方式(只读)打开，就用ofstream来定义;
    // 如果想以输入/输出方式来打开，就用fstream来定义。
    if (!file.good()){
        std::cerr << "read " << engine_name << " error!" << std::endl;
        return -1;
    }
    // define trtmodelstream
    char *trtModelStream = nullptr;
    size_t size = 0;
    // get the size of engine file
    file.seekg(0, file.end);
    size = file.tellg();
    // back to the begining of the engine file
    file.seekg(0, file.beg);
    // trtModelStream is defined as a ["engine file size "] size char
    trtModelStream = new char[size];
    assert(trtModelStream);
    // rtrModelStream read file's content
    file.read(trtModelStream, size);
    file.close();

    // file_names ['A','B',......]
    std::vector<std::string> file_names;
    if (read_files_in_dir(img_dir.c_str(), file_names) < 0) {
        std::cerr << "read_files_in_dir failed." << std::endl;
        return -1;
    }

read_files_in_dir 函数

// static inline 内联函数 加速用
// 参数前面有const : 只要整个函数不改变这个参数,则加上const,编译器可以并行处理加速
static inline int read_files_in_dir(const char *p_dir_name, std::vector<std::string> &file_names) {
    DIR *p_dir = opendir(p_dir_name);
    if (p_dir == nullptr) {
        return -1;
    }

    struct dirent* p_file = nullptr;
    while ((p_file = readdir(p_dir)) != nullptr) {
        if (strcmp(p_file->d_name, ".") != 0 &&
            strcmp(p_file->d_name, "..") != 0) {
            //std::string cur_file_name(p_dir_name);
            //cur_file_name += "/";
            //cur_file_name += p_file->d_name;
            std::string cur_file_name(p_file->d_name);
            file_names.push_back(cur_file_name);
            // 把img name pushback到filenames
        }
    }

    closedir(p_dir);
    return 0;
}

三、run inference

// prepare input data ---------------------------
    static float data[BATCH_SIZE * 3 * INPUT_H * INPUT_W];
    //for (int i = 0; i < 3 * INPUT_H * INPUT_W; i++)
    //    data[i] = 1.0;
    static float prob[BATCH_SIZE * OUTPUT_SIZE];
    IRuntime* runtime = createInferRuntime(gLogger);
    assert(runtime != nullptr);
    ICudaEngine* engine = runtime->deserializeCudaEngine(trtModelStream, size);
    assert(engine != nullptr);
    IExecutionContext* context = engine->createExecutionContext();
    assert(context != nullptr);
    delete[] trtModelStream;
    assert(engine->getNbBindings() == 2);
    void* buffers[2];
    // In order to bind the buffers, we need to know the names of the input and output tensors.
    // Note that indices are guaranteed to be less than IEngine::getNbBindings()
    const int inputIndex = engine->getBindingIndex(INPUT_BLOB_NAME);
    const int outputIndex = engine->getBindingIndex(OUTPUT_BLOB_NAME);
    assert(inputIndex == 0);
    assert(outputIndex == 1);
    // Create GPU buffers on device
    CUDA_CHECK(cudaMalloc(&buffers[inputIndex], BATCH_SIZE * 3 * INPUT_H * INPUT_W * sizeof(float)));
    CUDA_CHECK(cudaMalloc(&buffers[outputIndex], BATCH_SIZE * OUTPUT_SIZE * sizeof(float)));
    // Create stream
    cudaStream_t stream;
    CUDA_CHECK(cudaStreamCreate(&stream));

    int fcount = 0;
    for (int f = 0; f < (int)file_names.size(); f++) {
        fcount++;
        if (fcount < BATCH_SIZE && f + 1 != (int)file_names.size()) continue;
        for (int b = 0; b < fcount; b++) {
            cv::Mat img = cv::imread(img_dir + "/" + file_names[f - fcount + 1 + b]);
            if (img.empty()) continue;
            cv::Mat pr_img = preprocess_img(img, INPUT_W, INPUT_H); // letterbox BGR to RGB
            int i = 0;
            for (int row = 0; row < INPUT_H; ++row) {
                uchar* uc_pixel = pr_img.data + row * pr_img.step;
                for (int col = 0; col < INPUT_W; ++col) {
                    data[b * 3 * INPUT_H * INPUT_W + i] = (float)uc_pixel[2] / 255.0;
                    data[b * 3 * INPUT_H * INPUT_W + i + INPUT_H * INPUT_W] = (float)uc_pixel[1] / 255.0;
                    data[b * 3 * INPUT_H * INPUT_W + i + 2 * INPUT_H * INPUT_W] = (float)uc_pixel[0] / 255.0;
                    uc_pixel += 3;
                    ++i;
                }
            }
        }

        // Run inference
        auto start = std::chrono::system_clock::now();
        doInference(*context, stream, buffers, data, prob, BATCH_SIZE);
        auto end = std::chrono::system_clock::now();
        std::cout << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms" << std::endl;
        std::vector<std::vector<Yolo::Detection>> batch_res(fcount);
        for (int b = 0; b < fcount; b++) {
            auto& res = batch_res[b];
            nms(res, &prob[b * OUTPUT_SIZE], CONF_THRESH, NMS_THRESH);
        }
        for (int b = 0; b < fcount; b++) {
            auto& res = batch_res[b];
            //std::cout << res.size() << std::endl;
            cv::Mat img = cv::imread(img_dir + "/" + file_names[f - fcount + 1 + b]);
            for (size_t j = 0; j < res.size(); j++) {
                cv::Rect r = get_rect(img, res[j].bbox);
                cv::rectangle(img, r, cv::Scalar(0x27, 0xC1, 0x36), 2);
                cv::putText(img, std::to_string((int)res[j].class_id), cv::Point(r.x, r.y - 1), cv::FONT_HERSHEY_PLAIN, 1.2, cv::Scalar(0xFF, 0xFF, 0xFF), 2);
            }
            cv::imwrite("_" + file_names[f - fcount + 1 + b], img);
        }
        fcount = 0;
    }

    // Release stream and buffers
    cudaStreamDestroy(stream);
    CUDA_CHECK(cudaFree(buffers[inputIndex]));
    CUDA_CHECK(cudaFree(buffers[outputIndex]));
    // Destroy the engine
    context->destroy();
    engine->destroy();
    runtime->destroy();