ubuntu20.04使用C++与ONNXRuntime对yolov8目标检测模型进行推理预测。
- TensorRT版本 8.2.5.1
- opencv版本 4.5.5
- cuda版本 11.6
默认以上环境都已配置完成。
pt模型转onnx指令
yolo export model=best.pt format=onnx
onnx模型转engien指令(需要修改为自己安装的TensorRT路径)
/home/user/tools/TensorRT-8.2.5.1/bin/trtexec --onnx=best.onnx --workspace=4096 --fp16 --dumpLayerInfo --saveEngine=best.engine
上述onnx转engien过程耗时比较长(20分钟左右),如果等到终端打印&&&& PASSED字样,表示转换成功。
源码在附件中。
编译运行(需要自行修改相关配置参数)
mkdir build
cd build
cmake ..
make && ./yolov8_trt
- 头文件 — my_utils.h
不知道是啥原因,博主调用opencv中的nms算法一直报错,所以在my_utils.h中直接复制了opencv里nms的源码。没有调用问题的童鞋可以直接用cv::NMSBoxes(boxes, confs, confThreshold,iouThreshold, indices)哈
#pragma once
#include <algorithm>
#include <fstream>
#include <iostream>
#include <opencv2/opencv.hpp>
#include <vector>
#include <chrono>
#include <cmath>
#include <numeric>
using namespace cv;
//前处理
static inline cv::Mat preprocess_img(cv::Mat &img, int input_w, int input_h, std::vector<int> &padsize)
{
int w, h, x, y;
float r_w = input_w / (img.cols * 1.0);
float r_h = input_h / (img.rows * 1.0);
if (r_h > r_w)
{
w = input_w;
h = r_w * img.rows;
x = 0;
y = (input_h - h) / 2;
}
else
{
w = r_h * img.cols;
h = input_h;
x = (input_w - w) / 2;
y = 0;
}
cv::Mat re(h, w, CV_8UC3);
cv::resize(img, re, re.size(), 0, 0, cv::INTER_LINEAR);
cv::Mat out(input_h, input_w, CV_8UC3, cv::Scalar(128, 128, 128));
re.copyTo(out(cv::Rect(x, y, re.cols, re.rows)));
padsize.push_back(h);
padsize.push_back(w);
padsize.push_back(y);
padsize.push_back(x); // int newh = padsize[0], neww = padsize[1], padh = padsize[2], padw = padsize[3];
return out;
}
cv::Rect get_rect(cv::Mat &img, float bbox[4], int INPUT_W, int INPUT_H)
{
int l, r, t, b;
float r_w = INPUT_W / (img.cols * 1.0);
float r_h = INPUT_H / (img.rows * 1.0);
if (r_h > r_w)
{
l = bbox[0];
r = bbox[2];
t = bbox[1] - (INPUT_H - r_w * img.rows) / 2;
b = bbox[3] - (INPUT_H - r_w * img.rows) / 2;
l = l / r_w;
r = r / r_w;
t = t / r_w;
b = b / r_w;
}
else
{
l = bbox[0] - bbox[2] / 2.f - (INPUT_W - r_h * img.cols) / 2;
r = bbox[0] + bbox[2] / 2.f - (INPUT_W - r_h * img.cols) / 2;
t = bbox[1] - bbox[3] / 2.f;
b = bbox[1] + bbox[3] / 2.f;
l = l / r_h;
r = r / r_h;
t = t / r_h;
b = b / r_h;
}
return cv::Rect(l, t, r - l, b - t);
}
/*opencv nms 源码 start*/
template <typename T>
static inline bool SortScorePairDescend(const std::pair<float, T> &pair1,
const std::pair<float, T> &pair2)
{
return pair1.first > pair2.first;
}
inline void GetMaxScoreIndex(const std::vector<float> &scores, const float threshold, const int top_k,
std::vector<std::pair<float, int>> &score_index_vec)
{
CV_DbgAssert(score_index_vec.empty());
// Generate index score pairs.
for (size_t i = 0; i < scores.size(); ++i)
{
if (scores[i] > threshold)
{
score_index_vec.push_back(std::make_pair(scores[i], i));
}
}
// Sort the score pair according to the scores in descending order
std::stable_sort(score_index_vec.begin(), score_index_vec.end(),
SortScorePairDescend<int>);
// Keep top_k scores if needed.
if (top_k > 0 && top_k < (int)score_index_vec.size())
{
score_index_vec.resize(top_k);
}
}
template <typename BoxType>
inline void NMSFast_(const std::vector<BoxType> &bboxes,
const std::vector<float> &scores, const float score_threshold,
const float nms_threshold, const float eta, const int top_k,
std::vector<int> &indices,
float (*computeOverlap)(const BoxType &, const BoxType &),
int limit = std::numeric_limits<int>::max())
{
CV_Assert(bboxes.size() == scores.size());
// Get top_k scores (with corresponding indices).
std::vector<std::pair<float, int>> score_index_vec;
GetMaxScoreIndex(scores, score_threshold, top_k, score_index_vec);
// Do nms.
float adaptive_threshold = nms_threshold;
indices.clear();
for (size_t i = 0; i < score_index_vec.size(); ++i)
{
const int idx = score_index_vec[i].second;
bool keep = true;
for (int k = 0; k < (int)indices.size() && keep; ++k)
{
const int kept_idx = indices[k];
float overlap = computeOverlap(bboxes[idx], bboxes[kept_idx]);
keep = overlap <= adaptive_threshold;
}
if (keep)
{
indices.push_back(idx);
if (indices.size() >= limit)
{
break;
}
}
if (keep && eta < 1 && adaptive_threshold > 0.5)
{
adaptive_threshold *= eta;
}
}
}
template <typename T>
static inline float rectOverlap(const T &a, const T &b)
{
return 1.f - static_cast<float>(jaccardDistance(a, b));
}
void NMSBoxes_(const std::vector<cv::Rect> &bboxes, const std::vector<float> &scores,
const float score_threshold, const float nms_threshold,
std::vector<int> &indices, const float eta = 1.0, const int top_k = 0)
{
CV_Assert_N(bboxes.size() == scores.size(), score_threshold >= 0,
nms_threshold >= 0, eta > 0);
NMSFast_(bboxes, scores, score_threshold, nms_threshold, eta, top_k, indices, rectOverlap);
}
/*opencv nms 源码 end*/
- 源文件 — detect.cpp
#include "NvInfer.h"
#include "cuda_runtime_api.h"
#include "NvInferPlugin.h"
// #include "logging.h"
#include <opencv2/opencv.hpp>
#include "my_utils.h"
#include <string>
#include <regex>
#include <filesystem>
using namespace nvinfer1;
using namespace cv;
using namespace std;
int INPUT_H;
int INPUT_W;
int CHANNELS;
int CLASSES;
int Num_box;
int OUTPUT_SIZE; // output0
static const float CONF_THRESHOLD = 0.51;
static const float NMS_THRESHOLD = 0.45;
//这里的名称通常是通过调用 network->getInput(i)->setName("images") 和 network->getOutput(i)->setName("output0")来实现的
const char *INPUT_BLOB_NAME = "images";
const char *OUTPUT_BLOB_NAME = "output0"; // detect
static std::vector<cv::Scalar> colors;
class MyLogger : public nvinfer1::ILogger
{
public:
explicit MyLogger(nvinfer1::ILogger::Severity severity = nvinfer1::ILogger::Severity::kWARNING) : severity_(severity) {}
void log(nvinfer1::ILogger::Severity severity, const char *msg) noexcept override
{
if (severity <= severity_)
{
std::cerr << msg << std::endl;
}
}
nvinfer1::ILogger::Severity severity_;
};
struct OutputSeg
{
int id; // 结果类别id
float confidence; // 结果置信度
cv::Rect box; // 矩形框
cv::Mat boxMask; // 矩形框内mask,节省内存空间和加快速度
};
std::vector<std::string> loadNames(const std::string &path)
{
// load class names
std::vector<std::string> classNames;
std::ifstream infile(path);
if (infile.good())
{
std::string line;
while (getline(infile, line))
{
if (line.back() == '\r')
line.pop_back();
classNames.emplace_back(line);
}
infile.close();
}
else
{
std::cerr << "加载names文件失败!" << path << std::endl;
}
// set color
srand(time(0));
for (int i = 0; i < 2 * classNames.size(); i++)
{
int b = rand() % 256;
int g = rand() % 256;
int r = rand() % 256;
colors.push_back(cv::Scalar(b, g, r));
}
return classNames;
}
void DrawPred(Mat &img, std::vector<OutputSeg> results, const std::vector<std::string> &classNames)
{
cv::Mat imagetmp = img.clone();
cv::Mat image = img.clone();
int count = 0;
for (const OutputSeg &result : results)
{
count++;
int x = result.box.x;
int y = result.box.y;
int conf = (int)std::round(result.confidence * 100);
int classId = result.id;
// 显示类别和置信度
// std::string label = classNames[classId] + " 0." + std::to_string(conf);
//显示类别
std::string label = classNames[classId];
int baseline = 0;
cv::Size size = cv::getTextSize(label, cv::FONT_ITALIC, 0.4, 2, &baseline);
image(result.box).setTo(colors[classId + classNames.size()], result.boxMask);
cv::rectangle(image, result.box, colors[classId], 2);
cv::rectangle(image,
cv::Point(x, y), cv::Point(x + size.width, y + 12),
colors[classId], -1);
cv::putText(image, label,
cv::Point(x, y - 3 + 12), cv::FONT_ITALIC,
1, cv::Scalar(255, 0, 0), 2);
}
cv::addWeighted(img, 0.4, image, 0.6, 0, img);
}
MyLogger gLogger;
void doInference(IExecutionContext &context, float *input, float *output, int batchSize)
{
const ICudaEngine &engine = context.getEngine();
assert(engine.getNbBindings() == 3);
void *buffers[3];
const int inputIndex = engine.getBindingIndex(INPUT_BLOB_NAME);
const int outputIndex = engine.getBindingIndex(OUTPUT_BLOB_NAME);
cudaMalloc(&buffers[inputIndex], batchSize * 3 * INPUT_H * INPUT_W * sizeof(float));
cudaMalloc(&buffers[outputIndex], batchSize * OUTPUT_SIZE * sizeof(float));
cudaStream_t stream;
cudaStreamCreate(&stream);
cudaMemcpyAsync(buffers[inputIndex], input, batchSize * 3 * INPUT_H * INPUT_W * sizeof(float), cudaMemcpyHostToDevice, stream);
context.enqueue(batchSize, buffers, stream, nullptr);
cudaMemcpyAsync(output, buffers[outputIndex], batchSize * OUTPUT_SIZE * sizeof(float), cudaMemcpyDeviceToHost, stream);
cudaStreamDestroy(stream);
cudaFree(buffers[inputIndex]);
}
bool detect()
{
std::string engine_filepath = "../models/LQ.engine";
std::string input_pic_filepath = "../Imginput/";
std::string output_pic_filepath = "../Imgoutput/";
const std::vector<std::string> classNames = loadNames("/home/mec/wushuang/Yolov8_tensorrt/models/detect.names");
if (classNames.empty())
{
std::cerr << "names文件加载失败!" << std::endl;
return 0;
}
// create a model using the API directly and serialize it to a stream
char *trtModelStream{nullptr}; // char* trtModelStream==nullptr; 开辟空指针后 要和new配合使用,比如89行 trtModelStream = new char[size]
size_t size{0}; // 与int固定四个字节不同有所不同,size_t的取值range是目标平台下最大可能的数组尺寸,一些平台下size_t的范围小于int的正数范围,又或者大于unsigned int. 使用Int既有可能浪费,又有可能范围不够大。
std::ifstream file(engine_filepath, std::ios::binary);
if (file.good())
{
std::cout << "engine文件加载成功!" << std::endl;
file.seekg(0, file.end); // 指向文件的最后地址
size = file.tellg(); // 把文件长度告诉给size
file.seekg(0, file.beg); // 指回文件的开始地址
trtModelStream = new char[size]; // 开辟一个char 长度是文件的长度
assert(trtModelStream); //
file.read(trtModelStream, size); // 将文件内容传给trtModelStream
file.close(); // 关闭
}
else
{
std::cout << "engine文件加载失败!" << std::endl;
return 0;
}
IRuntime *runtime = createInferRuntime(gLogger);
assert(runtime != nullptr);
bool didInitPlugins = initLibNvInferPlugins(nullptr, "");
ICudaEngine *engine = runtime->deserializeCudaEngine(trtModelStream, size, nullptr);
assert(engine != nullptr);
IExecutionContext *context = engine->createExecutionContext();
assert(context != nullptr);
delete[] trtModelStream;
int inputIndex = 0;
int outputIndex = 1;
Dims in_shape = context->getBindingDimensions(inputIndex);
Dims out_shape = context->getBindingDimensions(outputIndex);
// 注意这里需要和模型输入的宽高对应
// 如果模型的输入tensor为[1,channel,width,height]
// 用 INPUT_W = in_shape.d[2]; INPUT_H = in_shape.d[3];
// 如果模型的输入tensor为[1,channel,height,width]
// 用 INPUT_W = in_shape.d[3]; INPUT_H = in_shape.d[2];
INPUT_W = in_shape.d[3];
INPUT_H = in_shape.d[2];
CHANNELS = in_shape.d[1];
CLASSES = out_shape.d[1] - 4;
Num_box = out_shape.d[2];
OUTPUT_SIZE = out_shape.d[2] * out_shape.d[1];
std::cout << "INPUT_H:" << INPUT_H << std::endl;
std::cout << "INPUT_W:" << INPUT_W << std::endl;
std::cout << "CHANNELS:" << CHANNELS << std::endl;
std::cout << "CLASSES:" << CLASSES << std::endl;
std::cout << "Num_box:" << Num_box << std::endl;
std::cout << "OUTPUT_SIZE:" << OUTPUT_SIZE << std::endl;
std::vector<cv::String> pic_filenames;
cv::glob(input_pic_filepath + "*.jpg", pic_filenames);
for (auto pic_filename : pic_filenames)
{
Mat src = imread(pic_filename, 1);
if (src.empty())
{
std::cout << "图片加载失败!" << std::endl;
return 0;
}
int img_width = src.cols;
int img_height = src.rows;
// std::cout << "正在预测:" << pic_filename << std::endl;
// std::cout << "宽高:" << img_width << " " << img_height << std::endl;
float *data = new float[CHANNELS * INPUT_H * INPUT_W];
Mat pr_img;
std::vector<int> padsize;
pr_img = preprocess_img(src, INPUT_W, INPUT_H, padsize); // Resize
int newh = padsize[0], neww = padsize[1], padh = padsize[2], padw = padsize[3];
float ratio_h = (float)src.rows / newh;
float ratio_w = (float)src.cols / neww;
for (int c = 0; c < CHANNELS; c++)
{
for (int h = 0; h < INPUT_H; h++)
{
for (int w = 0; w < INPUT_W; w++)
{
data[c * INPUT_W * INPUT_H + h * INPUT_W + w] =
pr_img.at<cv::Vec3b>(h, w)[c] / 255.0f;
}
}
}
// Run inference
float *prob = new float[OUTPUT_SIZE];
auto start = std::chrono::system_clock::now();
doInference(*context, data, prob, 1);
auto end = std::chrono::system_clock::now();
// std::cout << "推理时间:" << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms" << std::endl;
std::vector<int> classIds; // 结果id数组
std::vector<float> confidences; // 结果每个id对应置信度数组
std::vector<cv::Rect> boxes; // 每个id矩形框
// 处理box
int net_length = CLASSES + 4;
cv::Mat out1 = cv::Mat(net_length, Num_box, CV_32F, prob);
start = std::chrono::system_clock::now();
for (int i = 0; i < Num_box; i++)
{
// 输出是1*net_length*Num_box;所以每个box的属性是每隔Num_box取一个值,共net_length个值
cv::Mat scores = out1(Rect(i, 4, 1, CLASSES)).clone();
Point classIdPoint;
double max_class_socre;
minMaxLoc(scores, 0, &max_class_socre, 0, &classIdPoint);
max_class_socre = (float)max_class_socre;
if (max_class_socre >= CONF_THRESHOLD)
{
float x = (out1.at<float>(0, i) - padw) * ratio_w; // cx
float y = (out1.at<float>(1, i) - padh) * ratio_h; // cy
float w = out1.at<float>(2, i) * ratio_w; // w
float h = out1.at<float>(3, i) * ratio_h; // h
int left = MAX((x - 0.5 * w), 0);
int top = MAX((y - 0.5 * h), 0);
int width = (int)w;
int height = (int)h;
if (width <= 0 || height <= 0)
{
continue;
}
classIds.push_back(classIdPoint.y);
confidences.push_back(max_class_socre);
boxes.push_back(Rect(left, top, width, height));
}
}
// 执行非最大抑制以消除具有较低置信度的冗余重叠框(NMS)
std::vector<int> nms_result;
NMSBoxes_(boxes, confidences, CONF_THRESHOLD, NMS_THRESHOLD, nms_result);
std::vector<OutputSeg>
output;
Rect holeImgRect(0, 0, src.cols, src.rows);
for (int i = 0; i < nms_result.size(); ++i)
{
int idx = nms_result[i];
OutputSeg result;
result.id = classIds[idx];
result.confidence = confidences[idx];
result.box = boxes[idx] & holeImgRect;
output.push_back(result);
}
end = std::chrono::system_clock::now();
// std::cout << "后处理时间:" << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms" << std::endl;
std::string tmp = pic_filename.substr(pic_filename.find_last_of('/'));
std::string new_filename = tmp.substr(0, tmp.find_last_of('.')) + "_detect" + tmp.substr(tmp.find_last_of('.'));
// 在原图上画检测框
DrawPred(src, output, classNames);
std::cout << new_filename << " Saved !!!" << std::endl;
cv::imwrite(output_pic_filepath + new_filename, src);
delete[] data;
delete[] prob;
}
// Destroy the engine
context->destroy();
engine->destroy();
runtime->destroy();
return 1;
}
int main()
{
if(detect())
{
std::cout << " 预测完成~" << std::endl;
}
else{
std::cout << " 预测失败" << std::endl;
}
return 0;
}
- 类别文件 — detect.names
需要根据自己的类别修改
pedes
motor-vehicle
non-motor-vehicle
road-block
- CmakeLists.txt
cmake_minimum_required(VERSION 3.0)
project(yolov8_trt)
add_definitions(-std=c++11)
option(CUDA_USE_STATIC_CUDA_RUNTIME OFF)
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_BUILD_TYPE Release)
include_directories("include/")
find_package(OpenCV)
# 需要修改为自己安装TensorRT的路径
include_directories(${OpenCV_INCLUDE_DIRS} /home/user/tools/TensorRT-8.2.5.1/include)
link_directories(/home/user/tools/TensorRT-8.2.5.1/lib)
# 需要修改为自己安装cuda的路径
include_directories(/usr/local/cuda/include)
link_directories(/usr/local/cuda/lib64)
find_package(CUDA REQUIRED)
message(STATUS " libraries: ${CUDA_LIBRARIES}")
message(STATUS " include path: ${CUDA_INCLUDE_DIRS}")
include_directories(${CUDA_INCLUDE_DIRS})
enable_language(CUDA)
add_executable(yolov8_trt
src/detect.cpp
)
target_link_libraries(yolov8_trt nvinfer)
target_link_libraries(yolov8_trt nvonnxparser)
# target_link_libraries(yolov8_trt cudart)
target_link_libraries(yolov8_trt "/usr/local/cuda-11.6/lib64/libcudart.so")
target_link_libraries(yolov8_trt nvinfer_plugin)
target_link_libraries(yolov8_trt ${OpenCV_LIBS})