一、环境配置
需要配置好CUDA、CUDNN环境,还要装好TensorRT环境,参考下面这篇博客
【Ubuntu版】TensorRT安装教程(tar包方式)_ubuntu安装tensorrt-CSDN博客
二、模型准备
首先你需要一个ONNX模型文件,我是Pytorch->ONNX
from ultralytics import YOLO
model = YOLO("./yolov8n.pt")
if __name__ == '__main__':
model.export(format="onnx")
然后得到yolov8n.onnx文件,接下来就是ONNX->trt
找到你们TensorRT安装位置
TensorRT-8.6.4.3/bin/trtexec --onnx=yolov8n.onnx --saveEigine=yolov8n.trt
这样就生成了TensorRT可以使用的模型文件
三、yolov8输出结果分析
yolov8输出的output0.shape为 1x84x8400
8400是预选框的数量,比yolov5的25600的少了很多
其中84的为前4个是框的xywh(中心坐标加宽高),与yolov5相比少了一个objectness,那么objectness怎么获得呢?在yolov8中objectness=后面80个类的confidence中最大的那个
最终的score=objectness*confidence
四、主要代码
detect.cpp
#include<iostream>
#include<opencv2/opencv.hpp>
#include <opencv2/core/core.hpp>
#include <opencv2/imgproc.hpp>
#include<fstream>
#include "NvInfer.h"
#include "processing.hpp"
//#include "logging.h"
using namespace nvinfer1;
using namespace std;
const int model_width = 640;
const int model_height = 640;
class MyLogger : public nvinfer1::ILogger
{
public:
explicit MyLogger(nvinfer1::ILogger::Severity severity =nvinfer1::ILogger::Severity::kWARNING) : severity_(severity) {}
void log(nvinfer1::ILogger::Severity severity, const char *msg) noexcept override
{
if (severity <= severity_) {
std::cerr << msg << std::endl;
}
}
nvinfer1::ILogger::Severity severity_;
};
int main()
{
//一、图像处理
string image_path = "/home/hitcrt/code/tensorrt/TRT_test/street.jpg"; //填写自己图片路径(需要绝对路径)
cv::Mat input_image = cv::imread(image_path);
float* input_blob = new float[model_height * model_width * 3];
cv::Mat resize_image;
//比例
const float _ratio = std::min(model_width / (input_image.cols * 1.0f),
model_height / (input_image.rows * 1.0f));
// 等比例缩放
const int border_width = input_image.cols * _ratio;
const int border_height = input_image.rows * _ratio;
// 计算偏移值
const int x_offset = (model_width - border_width) / 2;
const int y_offset = (model_height - border_height) / 2;
//将输入图像缩放至resize_image
cv::resize(input_image, resize_image, cv::Size(border_width, border_height));
//复制图像并且制作边界
cv::copyMakeBorder(resize_image, resize_image, y_offset, y_offset, x_offset,
x_offset, cv::BORDER_CONSTANT, cv::Scalar(114, 114, 114));
// 转换为RGB格式
cv::cvtColor(resize_image, resize_image, cv::COLOR_BGR2RGB);
//归一化
const int channels = resize_image.channels();
const int width = resize_image.cols;
const int height = resize_image.rows;
for (int c = 0; c < channels; c++) {
for (int h = 0; h < height; h++) {
for (int w = 0; w < width; w++) {
input_blob[c * width * height + h * width + w] =
resize_image.at<cv::Vec3b>(h, w)[c] / 255.0f; //at<Vec3b> 是 OpenCV 中用于访问图像像素的一种方法,使用 at<Vec3b> 获取彩色图像中特定位置的像素颜色值
}
}
}
//二、模型反序列化
MyLogger logger;
//读取trt信息
const std::string engine_file_path = "/home/hitcrt/code/tensorrt/TRT_test/yolov8n.trt"; //填写自己trt文件路径(需要绝对路径)
std::stringstream engine_file_stream;
engine_file_stream.seekg(0, engine_file_stream.beg); //从起始位置偏移0个字节,指针移动到文件流的开头
std::ifstream ifs(engine_file_path);
engine_file_stream << ifs.rdbuf(); //将读取到的数据流交给engine_file_stream
ifs.close();
engine_file_stream.seekg(0, std::ios::end); //先把文件输入流指针定位到文档末尾来获取文档的长度
const int model_size = engine_file_stream.tellg(); //获取文件流的总长度
engine_file_stream.seekg(0, std::ios::beg);
void *model_mem = malloc(model_size); //开辟一样长的空间
engine_file_stream.read(static_cast<char *>(model_mem), model_size); //将内容读取到model_mem中
nvinfer1::IRuntime *runtime = nvinfer1::createInferRuntime(logger);
nvinfer1::ICudaEngine *engine = runtime->deserializeCudaEngine(model_mem, model_size);
free(model_mem);
//三、模型推理
nvinfer1::IExecutionContext *context = engine->createExecutionContext();
void *buffers[2];
// 获取模型输入尺寸并分配GPU内存
nvinfer1::Dims input_dim = engine->getBindingDimensions(0);
int input_size = 1;
for (int j = 0; j < input_dim.nbDims; ++j) {
if(input_dim.d[j] < 0)
input_size *= -input_dim.d[j];
else
input_size *= input_dim.d[j];
}
cudaMalloc(&buffers[0], input_size * sizeof(float));
// 获取模型输出尺寸并分配GPU内存
nvinfer1::Dims output_dim = engine->getBindingDimensions(1);
int output_size = 1;
for (int j = 0; j < output_dim.nbDims; ++j) {
if(output_dim.d[j] < 0)
output_size *= -output_dim.d[j];
else
output_size *= output_dim.d[j];
}
cudaMalloc(&buffers[1], output_size * sizeof(float));
// 给模型输出数据分配相应的CPU内存
float *output_buffer = new float[output_size];
//数据投入
cudaStream_t stream;
cudaStreamCreate(&stream);
// 拷贝输入数据
cudaMemcpyAsync(buffers[0], input_blob, input_size * sizeof(float),
cudaMemcpyHostToDevice, stream);
// 执行推理
if(context->enqueueV2(buffers, stream, nullptr))
{
cout << "enqueueV2执行推理成功" << endl;
}
else{
cout << "enqueueV2执行推理失败" << endl;
return -1;
}
// 拷贝输出数据
cudaMemcpyAsync(output_buffer, buffers[1], output_size * sizeof(float),
cudaMemcpyDeviceToHost, stream);
cudaStreamSynchronize(stream);
delete context;
delete engine;
delete runtime;
delete[] input_blob;
//四、输出结果output_buffer,放入objs xywh为中心点坐标 和宽高
float *ptr = output_buffer; // 1x84x8400 = 705600
vector<vector<float>> temp(84, vector<float>(8400));
vector<vector<float>> outVec(8400, vector<float>(84));
for(int i = 0; i < 705600; i++)
{
temp[i/8400][i%8400] = *ptr;
ptr++;
}
for(int i = 0; i < 84; i++)
{
for(int j = 0; j < 8400; j++)
{
outVec[j][i] = temp[i][j];
}
}
std::vector<Object> objs;
for (int i = 0; i < 8400; ++i)
{
const float objectness = *(std::max_element(outVec[i].begin() + 4, outVec[i].begin() + 83));
if (objectness >= 0.45f)
{
const int label = std::max_element(outVec[i].begin() + 4, outVec[i].begin() + 83) - (outVec[i].begin() + 4); //std::max_element返回范围内的最大元素
const float confidence = outVec[i][label + 4] * objectness;
if (confidence >= 0.25f) {
const float bx = outVec[i][0];
const float by = outVec[i][1];
const float bw = outVec[i][2];
const float bh = outVec[i][3];
Object obj;
// 还原图像尺寸中box的尺寸比例,这里要减掉偏移值,并把box中心点坐标xy转成左上角坐标xy
obj.box.x = (bx - bw * 0.5f - x_offset) / _ratio;
obj.box.y = (by - bh * 0.5f - y_offset) / _ratio;
obj.box.width = bw / _ratio;
obj.box.height = bh / _ratio;
obj.label = label;
obj.confidence = confidence;
objs.push_back(std::move(obj));
}
}
} // i loop
//五、NMS非极大值抑制
vector<Object> output;
hardNMS(objs, output, 0.6, 10);
//六、画框
vector<Object>::iterator it = output.begin();
while(it != output.end()){
cv::Point topLeft(it->box.x, it->box.y);
cv::Point bottomRight(it->box.x + it->box.width, it->box.y + it->box.height);
cv::rectangle(input_image, topLeft, bottomRight, cv::Scalar(0, 0, 255), 2);
std::stringstream buff;
buff.precision(2); //覆盖默认精度,置信度保留2位小数
buff.setf(std::ios::fixed);
buff << it->confidence;
string text =names[it->label] + " " + buff.str();
cv::putText(input_image, text, topLeft, 0, 1, cv::Scalar(0, 255, 0), 2);
it++;
}
cv::imwrite("detected.jpg", input_image);
return 0;
}
preprocessing.hpp
#include <iostream>
#include <vector>
#include <list>
using namespace std;
//以coco数据集为例
string names[] = {"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
"fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
"elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
"'skis'", "'snowboard'", "'sports ball'", "'kite'", "'baseball bat'", "'baseball glove'", "'skateboard'", "'surfboard'",
"'tennis racket'", "'bottle'", "'wine glass'", "'cup'", "'fork'", "'knife'", "'spoon'", "'bowl'", "'banana'", "'apple'",
"'sandwich'", "'orange'", "'broccoli'", "'carrot'", "'hot dog'", "'pizza'", "'donut'", "'cake'", "'chair'", "'couch'",
"'potted plant'", "'bed'", "'dining table'", "'toilet'", "'tv'", "'laptop'", "'mouse'", "'remote'", "'keyboard'", "'cell phone'",
"'microwave'", "'oven'", "'toaster'", "'sink'", "'refrigerator'", "'book'", "'clock'", "'vase'", "'scissors'", "'teddy bear'",
"'hair drier'", "'toothbrush'"};
struct BOX
{
float x;
float y;
float width;
float height;
};
struct Object
{
BOX box; // lu点和wh
int label;
float confidence; //这里的confidence实际指的是score 即 objectness*confidence
};
bool cmp(Object &obj1, Object &obj2){
return obj1.confidence > obj2.confidence;
}
float iou_of(const Object &obj1, const Object &obj2)
{
float x1_lu = obj1.box.x;
float y1_lu = obj1.box.y;
float x1_rb = x1_lu + obj1.box.width;
float y1_rb = y1_lu + obj1.box.height;
float x2_lu = obj2.box.x;
float y2_lu = obj2.box.y;
float x2_rb = x2_lu + obj2.box.width;
float y2_rb = y2_lu + obj2.box.height;
//交集左上角坐标i_x1, i_y1
float i_x1 = std::max(x1_lu, x2_lu);
float i_y1 = std::max(y1_lu, y2_lu);
//交集右下角坐标i_x2, i_y2
float i_x2 = std::min(x1_rb, x2_rb);
float i_y2 = std::min(y1_rb, y2_rb);
//交集框宽高
float i_w = i_x2 - i_x1;
float i_h = i_y2 - i_y1;
//并集左上角坐标
float o_x1 = std::min(x1_lu, x2_lu);
float o_y1 = std::min(y1_lu, y2_lu);
//并集右下角坐标
float o_x2 = std::max(x1_rb, x2_rb);
float o_y2 = std::max(y1_rb, y2_rb);
//并集宽高
float o_w = o_x2 - o_x1;
float o_h = o_y2 - o_y1;
return (i_w*i_h) / (o_w*o_h);
}
std::vector<int> hardNMS(std::vector<Object> &input, std::vector<Object> &output, float iou_threshold, unsigned int topk)
{ //Object只有confidence和label
const unsigned int box_num = input.size();
std::vector<int> merged(box_num, 0);
std::vector<int> indices;
if (input.empty())
return indices;
std::vector<Object> res;
//先对bboxs按照conf进行排序
std::sort(input.begin(), input.end(),
[](const Object &a, const Object &b)
{ return a.confidence > b.confidence; }); //[]表示C++中的lambda函数
unsigned int count = 0;
for (unsigned int i = 0; i < box_num; ++i)
{ //按照conf依次遍历bbox
if (merged[i])
continue;
//如果已经被剔除,continue
Object buf;
buf = input[i];
merged[i] = 1; //剔除当前bbox
//由于后面的置信度低,只需要考虑当前bbox后面的即可
for (unsigned int j = i + 1; j < box_num; ++j)
{
if (merged[j])
continue;
float iou = static_cast<float>(iou_of(input[j], input[i]));
//计算iou
if (iou > iou_threshold)
{ //超过阈值认为重合,剔除第j个bbox,
merged[j] = 1;
}
}
indices.push_back(i);
res.push_back(buf); //将最高conf的bbox填入结果
// keep top k
//获取前k个输出,这个应该是针对密集输出的情况,此时input已经做了conf剔除
count += 1;
if (count >= topk)
break;
}
output.swap(res);
return indices;
}
float sigmoid(float x)
{
return 1.0 / (exp(-x) + 1.0);
}
CMakeLists.txt
cmake_minimum_required(VERSION 2.6)
project(Demo)
set(CMAKE_BUILD_TYPE "Debug") # 用于gdb调试
add_definitions(-std=c++11) # 14?
option(CUDA_USE_STATIC_CUDA_RUNTIME OFF) # 是否必要
# set(CMAKE_CXX_STANDARD 11) # 14?
# set(CMAKE_BUILD_TYPE Debug) # 用于gdb调试
find_package(CUDA REQUIRED)
if (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
message("embed_platform on")
include_directories(/usr/local/cuda/targets/aarch64-linux/include)
link_directories(/usr/local/cuda/targets/aarch64-linux/lib)
else()
message("embed_platform off")
include_directories(/usr/local/cuda/include)
link_directories(/usr/local/cuda/lib64)
endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall -Ofast -Wfatal-errors -D_MWAITXINTRIN_H_INCLUDED")
find_package(OpenCV)
include_directories(${OpenCV_INCLUDE_DIRS} /home/home_expand/TensorRT-8.6.1.6/include)
link_directories(/home/home_expand/TensorRT-8.6.1.6/lib)
add_executable(Demo segment.cpp)
target_link_libraries(Demo nvinfer cudart ${OpenCV_LIBRARIES})
add_definitions(-O2 -pthread)