引言
介绍如何在C++环境中使用ONNX Runtime部署YOLOv5模型实现目标检测。由于笔记本电脑配置问题,没有实现cuda加速。下图是图片检测结果,代码内容是视频检测。
一、技术栈概述
1. YOLOv5模型
YOLOv5是Ultralytics公司开发的单阶段目标检测器,相比前代有更优的速度-精度平衡。其核心特点包括:
- 使用CSPDarknet53作为骨干网络
- PANet特征金字塔结构
- 自适应锚框计算
- 输出格式为1x25200x85的张量
2. ONNX Runtime
ONNX Runtime是一个高性能推理引擎,支持多种硬件平台,提供统一的API接口。主要优势:
- 跨平台支持(Windows/Linux/macOS)
- 多硬件后端(CPU/GPU/TPU等)
- 图优化和量化支持
- 低延迟推理
二、部署代码详解
#include "onnxruntime_cxx_api.h"
#include "cpu_provider_factory.h" //引入了ONNX Runtime的C++ API和CPU提供者工厂
#include<opencv2/opencv.hpp>
#include<fstream> //用于读取标签文件
#include <iostream>
using namespace std;
using namespace cv;
using namespace Ort;
/*
yolov5
输入: 1x3x640x640的归一化图像
输出: 1x25200x85的检测结果矩阵
*/
//读取类别标签
string labels_txt_file = "E:/PycharmProjects/YOLO/yolov5-7.0/yolov5-7.0/classes.txt";
vector<string> readClassNames()
{
vector<string> classNames;
//打开 labels_txt_file 指定的文件,并创建一个文件输入流对象 fp,后续可以通过 fp 操作该文件
ifstream fp(labels_txt_file);
if (!fp.is_open())
{
printf("文件打开失败!\n");
exit(-1);
}
string name;
while (!fp.eof())
{
getline(fp, name);
if (name.length())
classNames.push_back(name);
}
fp.close();
return classNames;
}
int main()
{
//初始化
float x_factor = 0.0;
float y_factor = 0.0;
vector<string> labels = readClassNames();
//Mat frame = imread("E:/PycharmProjects/YOLO/yolov5-7.0/yolov5-7.0/data/images/bus.jpg");//检测图片
string videopath = "E:/face_test.mp4";//检测视频
//VideoCapture cap(0);//检测本机摄像头
VideoCapture cap(videopath);
if (!cap.isOpened())
{
cerr << "无法打开摄像头" << endl;
return -1;
}
//创建onnx runtime会话
string onnxpath = "E:/PycharmProjects/YOLO/yolov5-7.0/yolov5-7.0/yolov5s.onnx";
wstring modelpath = wstring(onnxpath.begin(), onnxpath.end());//逐个字符转换为宽字符字符串
SessionOptions session_options;
Env env = Env(ORT_LOGGING_LEVEL_ERROR, "env_yolov5s_onnx");
session_options.SetGraphOptimizationLevel(ORT_ENABLE_BASIC);//优化计算图,提高推理速度
OrtSessionOptionsAppendExecutionProvider_CPU(session_options, 0);
Session session_(env, modelpath.c_str(), session_options);//创建会话
//获取输入输出信息
int input_nodes_num = session_.GetInputCount();
int output_nodes_num = session_.GetOutputCount();
vector<string> input_node_names;
vector<string> output_node_names;
AllocatorWithDefaultOptions allocator;
int input_h = 0;
int input_w = 0;
for (int i = 0; i < input_nodes_num; i++)
{
auto input_name = session_.GetInputNameAllocated(i, allocator);
input_node_names.push_back(input_name.get());
auto inputShapeInfo = session_.GetInputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape();
int ch = inputShapeInfo[1];
input_h = inputShapeInfo[2];
input_w = inputShapeInfo[3];
cout << "输入格式:" << ch << "x" << input_h << "x" << input_w << endl;
}
int num = 0;
int nc = 0;
for (int i = 0; i < output_nodes_num; i++)
{
auto output_name = session_.GetOutputNameAllocated(i, allocator);
output_node_names.push_back(output_name.get());
auto outShapeInfo = session_.GetOutputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape();
num = outShapeInfo[1];
nc = outShapeInfo[2];
cout << "输出格式:" << num << "x" << nc << endl;
}
while(true)
{
Mat frame;
cap >> frame;
if (frame.empty())
{
cerr << "无法获取帧!" << endl;
break;
}
//图像预处理
int64 start = getTickCount();//记录当前时间戳,用于后续计算 FPS(帧率)
//图像填充,将图像放在一个正方形背景中,保持宽高比不变,避免变形
int w = frame.cols;
int h = frame.rows;
int max_ = max(h, w);
Mat image = Mat::zeros(Size(max_, max_), CV_8UC3);
Rect roi(0, 0, w, h);
frame.copyTo(image(roi));
x_factor = image.cols / static_cast<float>(640);//记录图像从实际尺寸到模型输入尺寸640x640的缩放比例
y_factor = image.rows / static_cast<float>(640);
//归一化
Mat blob = dnn::blobFromImage(image, 1.0 / 255.0, Size(input_w, input_h), Scalar(0, 0, 0), true, false);
size_t tpixels = input_h * input_w * 3; // 计算总像素数(640*640*3),表示输入张量的元素总数
array<int64_t, 4> input_shape_info{ 1,3,input_h,input_w };//显式指定输入形状为 [1, 3, 640, 640]
//创建输入张量并推理
auto allocator_info = MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
Value input_tensor_ = Value::CreateTensor<float>(allocator_info, blob.ptr<float>(), tpixels, input_shape_info.data(), input_shape_info.size());
const array<const char*, 1>inputNames = { input_node_names[0].c_str() };
const std::array<const char*, 1> outNames = { output_node_names[0].c_str() };
vector<Value> ort_outputs;
try
{
ort_outputs = session_.Run(RunOptions{ nullptr }, inputNames.data(), &input_tensor_, 1, outNames.data(), outNames.size());
}
catch (exception e)
{
cout << e.what() << endl;
}
const float* pdata = ort_outputs[0].GetTensorMutableData<float>();
//后处理
vector<Rect> boxes;
vector<int> classIds;
vector<float> confidences;
Mat det_output(num, nc, CV_32F, (float*)pdata);
for (int i = 0; i < det_output.rows; i++)
{
float confidence = det_output.at<float>(i, 4);
if (confidence < 0.45) {
continue;
}
Mat classes_scores = det_output.row(i).colRange(5, nc);
Point classIdPoint;
double score;
minMaxLoc(classes_scores, 0, &score, 0, &classIdPoint);
// 置信度 0~1之间
if (score > 0.25)
{
float cx = det_output.at<float>(i, 0);
float cy = det_output.at<float>(i, 1);
float ow = det_output.at<float>(i, 2);
float oh = det_output.at<float>(i, 3);
int x = static_cast<int>((cx - 0.5 * ow) * x_factor);
int y = static_cast<int>((cy - 0.5 * oh) * y_factor);
int width = static_cast<int>(ow * x_factor);
int height = static_cast<int>(oh * y_factor);
cv::Rect box;
box.x = x;
box.y = y;
box.width = width;
box.height = height;
boxes.push_back(box);
classIds.push_back(classIdPoint.x);
confidences.push_back(score);
}
}
// 非极大值抑制
vector<int> indexes;
dnn::NMSBoxes(boxes, confidences, 0.25, 0.45, indexes);
for (size_t i = 0; i < indexes.size(); i++)
{
int idx = indexes[i];
int cid = classIds[idx];
rectangle(frame, boxes[idx], cv::Scalar(0, 0, 255), 2, 8, 0);
putText(frame, labels[cid].c_str(), boxes[idx].tl(), FONT_HERSHEY_PLAIN, 1.0, Scalar(255, 0, 0), 1, 8);
}
// 计算FPS
float t = (cv::getTickCount() - start) / static_cast<float>(cv::getTickFrequency());
putText(frame, cv::format("FPS: %.2f", 1.0 / t), Point(20, 40), FONT_HERSHEY_PLAIN, 2.0, Scalar(255, 0, 0), 1, 8);
imshow("YOLOv5实时推理演示", frame);
//按ESC退出
if (waitKey(1) == 27)
{
break;
}
}
// relase resource
session_options.release();
session_.release();
return 0;
}