ubuntu20.04使用C++与TensorRT8.2对yolov8目标检测模型进行推理预测(附源码)

jessie的垃圾桶

已于 2024-08-13 13:49:16 修改

阅读量259

点赞数 11

文章标签： c++ YOLO 目标检测

于 2024-08-13 13:47:01 首次发布

本文链接：https://blog.csdn.net/m0_64293675/article/details/141158293

版权

ubuntu20.04使用C++与ONNXRuntime对yolov8目标检测模型进行推理预测。

TensorRT版本 8.2.5.1
opencv版本 4.5.5
cuda版本 11.6

默认以上环境都已配置完成。
pt模型转onnx指令

yolo export model=best.pt format=onnx

onnx模型转engien指令（需要修改为自己安装的TensorRT路径）

/home/user/tools/TensorRT-8.2.5.1/bin/trtexec --onnx=best.onnx --workspace=4096 --fp16 --dumpLayerInfo --saveEngine=best.engine

上述onnx转engien过程耗时比较长（20分钟左右），如果等到终端打印&&&& PASSED字样，表示转换成功。

源码在附件中。
编译运行(需要自行修改相关配置参数)

mkdir build
cd build
cmake ..
make && ./yolov8_trt

头文件 — my_utils.h
不知道是啥原因，博主调用opencv中的nms算法一直报错，所以在my_utils.h中直接复制了opencv里nms的源码。没有调用问题的童鞋可以直接用cv::NMSBoxes(boxes, confs, confThreshold,iouThreshold, indices)哈

#pragma once
#include <algorithm>
#include <fstream>
#include <iostream>
#include <opencv2/opencv.hpp>
#include <vector>
#include <chrono>
#include <cmath>
#include <numeric> 

using namespace cv;

//前处理
static inline cv::Mat preprocess_img(cv::Mat &img, int input_w, int input_h, std::vector<int> &padsize)
{
    int w, h, x, y;
    float r_w = input_w / (img.cols * 1.0);
    float r_h = input_h / (img.rows * 1.0);
    if (r_h > r_w)
    { 
        w = input_w;
        h = r_w * img.rows;
        x = 0;
        y = (input_h - h) / 2;
    }
    else
    {
        w = r_h * img.cols;
        h = input_h;
        x = (input_w - w) / 2;
        y = 0;
    }
    cv::Mat re(h, w, CV_8UC3);
    cv::resize(img, re, re.size(), 0, 0, cv::INTER_LINEAR);
    cv::Mat out(input_h, input_w, CV_8UC3, cv::Scalar(128, 128, 128));
    re.copyTo(out(cv::Rect(x, y, re.cols, re.rows)));
    padsize.push_back(h);
    padsize.push_back(w);
    padsize.push_back(y);
    padsize.push_back(x); // int newh = padsize[0], neww = padsize[1], padh = padsize[2], padw = padsize[3];

    return out;
}
cv::Rect get_rect(cv::Mat &img, float bbox[4], int INPUT_W, int INPUT_H)
{
    int l, r, t, b;
    float r_w = INPUT_W / (img.cols * 1.0);
    float r_h = INPUT_H / (img.rows * 1.0);
    if (r_h > r_w)
    {
        l = bbox[0];
        r = bbox[2];
        t = bbox[1] - (INPUT_H - r_w * img.rows) / 2;
        b = bbox[3] - (INPUT_H - r_w * img.rows) / 2;
        l = l / r_w;
        r = r / r_w;
        t = t / r_w;
        b = b / r_w;
    }
    else
    {
        l = bbox[0] - bbox[2] / 2.f - (INPUT_W - r_h * img.cols) / 2;
        r = bbox[0] + bbox[2] / 2.f - (INPUT_W - r_h * img.cols) / 2;
        t = bbox[1] - bbox[3] / 2.f;
        b = bbox[1] + bbox[3] / 2.f;
        l = l / r_h;
        r = r / r_h;
        t = t / r_h;
        b = b / r_h;
    }
    return cv::Rect(l, t, r - l, b - t);
}

/*opencv  nms  源码  start*/
template <typename T>
static inline bool SortScorePairDescend(const std::pair<float, T> &pair1,
                                        const std::pair<float, T> &pair2)
{
    return pair1.first > pair2.first;
}

inline void GetMaxScoreIndex(const std::vector<float> &scores, const float threshold, const int top_k,
                             std::vector<std::pair<float, int>> &score_index_vec)
{
    CV_DbgAssert(score_index_vec.empty());
    // Generate index score pairs.
    for (size_t i = 0; i < scores.size(); ++i)
    {
        if (scores[i] > threshold)
        {
            score_index_vec.push_back(std::make_pair(scores[i], i));
        }
    }

    // Sort the score pair according to the scores in descending order
    std::stable_sort(score_index_vec.begin(), score_index_vec.end(),
                     SortScorePairDescend<int>);

    // Keep top_k scores if needed.
    if (top_k > 0 && top_k < (int)score_index_vec.size())
    {
        score_index_vec.resize(top_k);
    }
}

template <typename BoxType>
inline void NMSFast_(const std::vector<BoxType> &bboxes,
                       const std::vector<float> &scores, const float score_threshold,
                       const float nms_threshold, const float eta, const int top_k,
                       std::vector<int> &indices,
                       float (*computeOverlap)(const BoxType &, const BoxType &),
                       int limit = std::numeric_limits<int>::max())
{
    CV_Assert(bboxes.size() == scores.size());

    // Get top_k scores (with corresponding indices).
    std::vector<std::pair<float, int>> score_index_vec;
    GetMaxScoreIndex(scores, score_threshold, top_k, score_index_vec);

    // Do nms.
    float adaptive_threshold = nms_threshold;
    indices.clear();
    for (size_t i = 0; i < score_index_vec.size(); ++i)
    {
        const int idx = score_index_vec[i].second;
        bool keep = true;
        for (int k = 0; k < (int)indices.size() && keep; ++k)
        {
            const int kept_idx = indices[k];
            float overlap = computeOverlap(bboxes[idx], bboxes[kept_idx]);
            keep = overlap <= adaptive_threshold;
        }
        if (keep)
        {
            indices.push_back(idx);
            if (indices.size() >= limit)
            {
                break;
            }
        }
        if (keep && eta < 1 && adaptive_threshold > 0.5)
        {
            adaptive_threshold *= eta;
        }
    }
}

template <typename T>
static inline float rectOverlap(const T &a, const T &b)
{
    return 1.f - static_cast<float>(jaccardDistance(a, b));
}

void NMSBoxes_(const std::vector<cv::Rect> &bboxes, const std::vector<float> &scores,
                 const float score_threshold, const float nms_threshold,
                 std::vector<int> &indices, const float eta = 1.0, const int top_k = 0)
{
    CV_Assert_N(bboxes.size() == scores.size(), score_threshold >= 0,
                nms_threshold >= 0, eta > 0);
    NMSFast_(bboxes, scores, score_threshold, nms_threshold, eta, top_k, indices, rectOverlap);
}

/*opencv  nms  源码  end*/

源文件 — detect.cpp

#include "NvInfer.h"
#include "cuda_runtime_api.h"
#include "NvInferPlugin.h"
// #include "logging.h"
#include <opencv2/opencv.hpp>
#include "my_utils.h"
#include <string>
#include <regex>
#include <filesystem>
using namespace nvinfer1;
using namespace cv;
using namespace std;

int INPUT_H;
int INPUT_W;
int CHANNELS;

int CLASSES;
int Num_box;
int OUTPUT_SIZE; // output0

static const float CONF_THRESHOLD = 0.51;
static const float NMS_THRESHOLD = 0.45;

//这里的名称通常是通过调用 network->getInput(i)->setName("images") 和 network->getOutput(i)->setName("output0")来实现的
const char *INPUT_BLOB_NAME = "images";
const char *OUTPUT_BLOB_NAME = "output0"; // detect

static std::vector<cv::Scalar> colors;
class MyLogger : public nvinfer1::ILogger
{
public:
    explicit MyLogger(nvinfer1::ILogger::Severity severity = nvinfer1::ILogger::Severity::kWARNING) : severity_(severity) {}

    void log(nvinfer1::ILogger::Severity severity, const char *msg) noexcept override
    {
        if (severity <= severity_)
        {
            std::cerr << msg << std::endl;
        }
    }
    nvinfer1::ILogger::Severity severity_;
};
struct OutputSeg
{
    int id;           // 结果类别id
    float confidence; // 结果置信度
    cv::Rect box;     // 矩形框
    cv::Mat boxMask;  // 矩形框内mask，节省内存空间和加快速度
};

std::vector<std::string> loadNames(const std::string &path)
{
    // load class names
    std::vector<std::string> classNames;
    std::ifstream infile(path);
    if (infile.good())
    {
        std::string line;
        while (getline(infile, line))
        {
            if (line.back() == '\r')
                line.pop_back();
            classNames.emplace_back(line);
        }
        infile.close();
    }
    else
    {
        std::cerr << "加载names文件失败！" << path << std::endl;
    }
    // set color
    srand(time(0));

    for (int i = 0; i < 2 * classNames.size(); i++)
    {
        int b = rand() % 256;
        int g = rand() % 256;
        int r = rand() % 256;
        colors.push_back(cv::Scalar(b, g, r));
    }
    return classNames;
}

void DrawPred(Mat &img, std::vector<OutputSeg> results, const std::vector<std::string> &classNames)
{
    cv::Mat imagetmp = img.clone();
    cv::Mat image = img.clone();
    int count = 0;
    for (const OutputSeg &result : results)
    {
        count++;
        int x = result.box.x;
        int y = result.box.y;

        int conf = (int)std::round(result.confidence * 100);
        int classId = result.id;
        // 显示类别和置信度
        // std::string label = classNames[classId] + " 0." + std::to_string(conf);

        //显示类别
        std::string label = classNames[classId];

        int baseline = 0;
        cv::Size size = cv::getTextSize(label, cv::FONT_ITALIC, 0.4, 2, &baseline);
        image(result.box).setTo(colors[classId + classNames.size()], result.boxMask);
        cv::rectangle(image, result.box, colors[classId], 2);
        cv::rectangle(image,
                      cv::Point(x, y), cv::Point(x + size.width, y + 12),
                      colors[classId], -1);
        cv::putText(image, label,
                    cv::Point(x, y - 3 + 12), cv::FONT_ITALIC,
                    1, cv::Scalar(255, 0, 0), 2);
    }
    cv::addWeighted(img, 0.4, image, 0.6, 0, img);
}

MyLogger gLogger;
void doInference(IExecutionContext &context, float *input, float *output, int batchSize)
{
    const ICudaEngine &engine = context.getEngine();

    assert(engine.getNbBindings() == 3);
    void *buffers[3];

    const int inputIndex = engine.getBindingIndex(INPUT_BLOB_NAME);
    const int outputIndex = engine.getBindingIndex(OUTPUT_BLOB_NAME);

    cudaMalloc(&buffers[inputIndex], batchSize * 3 * INPUT_H * INPUT_W * sizeof(float));
    cudaMalloc(&buffers[outputIndex], batchSize * OUTPUT_SIZE * sizeof(float));
    cudaStream_t stream;
    cudaStreamCreate(&stream);
    cudaMemcpyAsync(buffers[inputIndex], input, batchSize * 3 * INPUT_H * INPUT_W * sizeof(float), cudaMemcpyHostToDevice, stream);
    context.enqueue(batchSize, buffers, stream, nullptr);
    cudaMemcpyAsync(output, buffers[outputIndex], batchSize * OUTPUT_SIZE * sizeof(float), cudaMemcpyDeviceToHost, stream);
    cudaStreamDestroy(stream);
    cudaFree(buffers[inputIndex]);
}

bool detect()
{
    std::string engine_filepath = "../models/LQ.engine";

    std::string input_pic_filepath = "../Imginput/";
    std::string output_pic_filepath = "../Imgoutput/";
    const std::vector<std::string> classNames = loadNames("/home/mec/wushuang/Yolov8_tensorrt/models/detect.names");
    if (classNames.empty())
    {
        std::cerr << "names文件加载失败！" << std::endl;
        return 0;
    }
    // create a model using the API directly and serialize it to a stream
    char *trtModelStream{nullptr}; // char* trtModelStream==nullptr;  开辟空指针后 要和new配合使用，比如89行 trtModelStream = new char[size]
    size_t size{0};                // 与int固定四个字节不同有所不同,size_t的取值range是目标平台下最大可能的数组尺寸,一些平台下size_t的范围小于int的正数范围,又或者大于unsigned int. 使用Int既有可能浪费，又有可能范围不够大。

    std::ifstream file(engine_filepath, std::ios::binary);
    if (file.good())
    {
        std::cout << "engine文件加载成功！" << std::endl;
        file.seekg(0, file.end); // 指向文件的最后地址
        size = file.tellg();     // 把文件长度告诉给size

        file.seekg(0, file.beg);         // 指回文件的开始地址
        trtModelStream = new char[size]; // 开辟一个char 长度是文件的长度
        assert(trtModelStream);          //
        file.read(trtModelStream, size); // 将文件内容传给trtModelStream
        file.close();                    // 关闭
    }
    else
    {
        std::cout << "engine文件加载失败！" << std::endl;
        return 0;
    }

    IRuntime *runtime = createInferRuntime(gLogger);
    assert(runtime != nullptr);
    bool didInitPlugins = initLibNvInferPlugins(nullptr, "");
    ICudaEngine *engine = runtime->deserializeCudaEngine(trtModelStream, size, nullptr);
    assert(engine != nullptr);
    IExecutionContext *context = engine->createExecutionContext();
    assert(context != nullptr);
    delete[] trtModelStream;
    int inputIndex = 0;
    int outputIndex = 1;
    Dims in_shape = context->getBindingDimensions(inputIndex);
    Dims out_shape = context->getBindingDimensions(outputIndex);

    // 注意这里需要和模型输入的宽高对应
    // 如果模型的输入tensor为[1,channel,width,height]
    // 用 INPUT_W = in_shape.d[2];  INPUT_H = in_shape.d[3];
    // 如果模型的输入tensor为[1,channel,height,width]
    // 用 INPUT_W = in_shape.d[3];  INPUT_H = in_shape.d[2];
    
    INPUT_W = in_shape.d[3];
    INPUT_H = in_shape.d[2];
    CHANNELS = in_shape.d[1];

    CLASSES = out_shape.d[1] - 4;
    Num_box = out_shape.d[2];
    OUTPUT_SIZE = out_shape.d[2] * out_shape.d[1];
    std::cout << "INPUT_H:" << INPUT_H << std::endl;
    std::cout << "INPUT_W:" << INPUT_W << std::endl;
    std::cout << "CHANNELS:" << CHANNELS << std::endl;
    std::cout << "CLASSES:" << CLASSES << std::endl;
    std::cout << "Num_box:" << Num_box << std::endl;
    std::cout << "OUTPUT_SIZE:" << OUTPUT_SIZE << std::endl;

    std::vector<cv::String> pic_filenames;
    cv::glob(input_pic_filepath + "*.jpg", pic_filenames);

    for (auto pic_filename : pic_filenames)
    {
        Mat src = imread(pic_filename, 1);
        if (src.empty())
        {
            std::cout << "图片加载失败！" << std::endl;
            return 0;
        }
        int img_width = src.cols;
        int img_height = src.rows;
        // std::cout << "正在预测：" << pic_filename << std::endl;
        // std::cout << "宽高：" << img_width << " " << img_height << std::endl;

        float *data = new float[CHANNELS * INPUT_H * INPUT_W];
        Mat pr_img;
        std::vector<int> padsize;

        pr_img = preprocess_img(src, INPUT_W, INPUT_H, padsize); // Resize
        int newh = padsize[0], neww = padsize[1], padh = padsize[2], padw = padsize[3];
        float ratio_h = (float)src.rows / newh;
        float ratio_w = (float)src.cols / neww;

        for (int c = 0; c < CHANNELS; c++)
        {
            for (int h = 0; h < INPUT_H; h++)
            {
                for (int w = 0; w < INPUT_W; w++)
                {
                    data[c * INPUT_W * INPUT_H + h * INPUT_W + w] =
                        pr_img.at<cv::Vec3b>(h, w)[c] / 255.0f;
                }
            }
        }

        // Run inference
        float *prob = new float[OUTPUT_SIZE];

        auto start = std::chrono::system_clock::now();
        doInference(*context, data, prob, 1);
        auto end = std::chrono::system_clock::now();
        // std::cout << "推理时间：" << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms" << std::endl;

        std::vector<int> classIds;      // 结果id数组
        std::vector<float> confidences; // 结果每个id对应置信度数组
        std::vector<cv::Rect> boxes;    // 每个id矩形框

        // 处理box
        int net_length = CLASSES + 4;
        cv::Mat out1 = cv::Mat(net_length, Num_box, CV_32F, prob);

        start = std::chrono::system_clock::now();
        for (int i = 0; i < Num_box; i++)
        {
            // 输出是1*net_length*Num_box;所以每个box的属性是每隔Num_box取一个值，共net_length个值
            cv::Mat scores = out1(Rect(i, 4, 1, CLASSES)).clone();
            Point classIdPoint;
            double max_class_socre;
            minMaxLoc(scores, 0, &max_class_socre, 0, &classIdPoint);
            max_class_socre = (float)max_class_socre;
            if (max_class_socre >= CONF_THRESHOLD)
            {
                float x = (out1.at<float>(0, i) - padw) * ratio_w; // cx
                float y = (out1.at<float>(1, i) - padh) * ratio_h; // cy
                float w = out1.at<float>(2, i) * ratio_w;          // w
                float h = out1.at<float>(3, i) * ratio_h;          // h
                int left = MAX((x - 0.5 * w), 0);
                int top = MAX((y - 0.5 * h), 0);
                int width = (int)w;
                int height = (int)h;
                if (width <= 0 || height <= 0)
                {
                    continue;
                }

                classIds.push_back(classIdPoint.y);
                confidences.push_back(max_class_socre);
                boxes.push_back(Rect(left, top, width, height));
            }
        }
        // 执行非最大抑制以消除具有较低置信度的冗余重叠框（NMS）
        std::vector<int> nms_result;
        NMSBoxes_(boxes, confidences, CONF_THRESHOLD, NMS_THRESHOLD, nms_result);

        std::vector<OutputSeg>
            output;
        Rect holeImgRect(0, 0, src.cols, src.rows);
        for (int i = 0; i < nms_result.size(); ++i)
        {
            int idx = nms_result[i];
            OutputSeg result;
            result.id = classIds[idx];
            result.confidence = confidences[idx];
            result.box = boxes[idx] & holeImgRect;

            output.push_back(result);
        }

        end = std::chrono::system_clock::now();
        // std::cout << "后处理时间：" << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms" << std::endl;

        std::string tmp = pic_filename.substr(pic_filename.find_last_of('/'));
        std::string new_filename = tmp.substr(0, tmp.find_last_of('.')) + "_detect" + tmp.substr(tmp.find_last_of('.'));

        // 在原图上画检测框
        DrawPred(src, output, classNames);
        std::cout << new_filename << " Saved !!!" << std::endl;

        cv::imwrite(output_pic_filepath + new_filename, src);
        delete[] data;
        delete[] prob;
    }
    // Destroy the engine
    context->destroy();
    engine->destroy();
    runtime->destroy();

    return 1;
}

int main()
{

    if(detect())
    {
        std::cout << " 预测完成~" << std::endl;
    }
    else{
        std::cout << " 预测失败" << std::endl;
    }

    return 0;
}

类别文件 — detect.names
需要根据自己的类别修改

pedes
motor-vehicle
non-motor-vehicle
road-block

CmakeLists.txt

cmake_minimum_required(VERSION 3.0)
project(yolov8_trt)

add_definitions(-std=c++11)

option(CUDA_USE_STATIC_CUDA_RUNTIME OFF)
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_BUILD_TYPE Release)

include_directories("include/")

find_package(OpenCV)

# 需要修改为自己安装TensorRT的路径
include_directories(${OpenCV_INCLUDE_DIRS} /home/user/tools/TensorRT-8.2.5.1/include)
link_directories(/home/user/tools/TensorRT-8.2.5.1/lib)

# 需要修改为自己安装cuda的路径
include_directories(/usr/local/cuda/include)
link_directories(/usr/local/cuda/lib64)

find_package(CUDA REQUIRED)
message(STATUS " libraries: ${CUDA_LIBRARIES}")
message(STATUS " include path: ${CUDA_INCLUDE_DIRS}")
include_directories(${CUDA_INCLUDE_DIRS})
enable_language(CUDA)

add_executable(yolov8_trt
               src/detect.cpp
               )

target_link_libraries(yolov8_trt nvinfer)
target_link_libraries(yolov8_trt nvonnxparser)
# target_link_libraries(yolov8_trt cudart)
target_link_libraries(yolov8_trt "/usr/local/cuda-11.6/lib64/libcudart.so")
target_link_libraries(yolov8_trt nvinfer_plugin)
target_link_libraries(yolov8_trt ${OpenCV_LIBS})