【yolov5】opencv450 加载onnx 进行推理 GPU 加速

最新推荐文章于 2025-04-30 11:41:41 发布

十年一梦实验室

最新推荐文章于 2025-04-30 11:41:41 发布

阅读量1.2k

点赞数

CC 4.0 BY-SA版权

分类专栏： C++ ONNX 文章标签：计算机视觉 opencv

本文链接：https://blog.csdn.net/cxyhjl/article/details/125395227

C++ 同时被 2 个专栏收录

46 篇文章

订阅专栏

ONNX

2 篇文章

订阅专栏

该代码实现了一个YOLOv5目标检测模型的加载、图像预处理、检测及结果展示的过程。通过读取ONNX模型、设置CUDA或CPU运行环境，对输入图像进行缩放、归一化处理，然后利用OpenCV的dnn模块进行前向传播，获取检测框和置信度，并进行NMS非极大值抑制，最后将结果绘制在图像上并显示。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

#include <fstream>

#include <opencv2/opencv.hpp>

std::vector<std::string> load_class_list()
{
	std::vector<std::string> class_list;
	std::ifstream ifs("./classes.txt");
	std::string line;
	while (getline(ifs, line))
	{
		class_list.push_back(line);
	}
	return class_list;
}

void load_net(cv::dnn::Net& net, bool is_cuda)
{
	auto result = cv::dnn::readNet("./best2GPU.onnx");
	if (is_cuda)
	{
		std::cout << "Attempty to use CUDA\n";
		result.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
		result.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);//_FP16
	}
	else
	{
		std::cout << "Running on CPU\n";
		result.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
		result.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
	}
	net = result;
}

const std::vector<cv::Scalar> colors = { cv::Scalar(255, 255, 0), cv::Scalar(0, 255, 0), cv::Scalar(0, 255, 255), cv::Scalar(255, 0, 0) };
const float netAnchors[3][6] = { { 10,13, 16,30, 33,23 },{ 30,61, 62,45, 59,119 },{ 116,90, 156,198, 373,326 } };
const float netStride[4] = { 8, 16.0,32,64 };
const float INPUT_WIDTH = 640.0;
const float INPUT_HEIGHT = 640.0;
const float SCORE_THRESHOLD = 0.2;
const float NMS_THRESHOLD = 0.4;
const float CONFIDENCE_THRESHOLD = 0.4;
int strideSize = 3;

struct Detection
{
	int class_id;
	float confidence;
	cv::Rect box;
};

cv::Mat format_yolov5(const cv::Mat& source) {
	int col = source.cols;
	int row = source.rows;
	int _max = MAX(col, row);
	cv::Mat result = cv::Mat::zeros(_max, _max, CV_8UC3);
	source.copyTo(result(cv::Rect(0, 0, col, row)));
	return result;
}

void detect(cv::Mat& image, cv::dnn::Net& net, std::vector<Detection>& output, const std::vector<std::string>& className) {
	cv::Mat blob;

	auto input_image = format_yolov5(image);

	cv::dnn::blobFromImage(input_image, blob, 1. / 255., cv::Size(INPUT_WIDTH, INPUT_HEIGHT), cv::Scalar(), true, false);
	net.setInput(blob);
	std::vector<cv::Mat> outputs;
	net.forward(outputs, net.getUnconnectedOutLayersNames());

	float x_factor = input_image.cols / INPUT_WIDTH;
	float y_factor = input_image.rows / INPUT_HEIGHT;

	float* data = (float*)outputs[0].data;

	//const int dimensions = 85;
	//const int rows = 25200;// *outputs.size;//25200

	std::vector<int> class_ids;
	std::vector<float> confidences;
	std::vector<cv::Rect> boxes;
	float ratio_h = (float)input_image.rows / INPUT_HEIGHT;
	float ratio_w = (float)input_image.cols / INPUT_WIDTH;
	int net_width = className.size() + 5;  //输出的网络宽度是类别数+5//
	for (int stride = 0; stride < strideSize; stride++) {    //stride
		int grid_x = (int)(INPUT_WIDTH / netStride[stride]);
		int grid_y = (int)(INPUT_HEIGHT / netStride[stride]);
		for (int anchor = 0; anchor < 3; anchor++) {	//anchors
			const float anchor_w = netAnchors[stride][anchor * 2];
			const float anchor_h = netAnchors[stride][anchor * 2 + 1];
			for (int i = 0; i < grid_y; i++) {
				for (int j = 0; j < grid_x; j++)
				{
					float confidence = data[4];
					if (confidence >= CONFIDENCE_THRESHOLD) {
						cv::Mat scores(1, className.size(), CV_32FC1, data + 5);
						cv::Point classIdPoint;
						double max_class_socre;
						minMaxLoc(scores, 0, &max_class_socre, 0, &classIdPoint);
						max_class_socre = (float)max_class_socre;
						if (max_class_socre >= SCORE_THRESHOLD) {
							//rect [x,y,w,h]
							float x = data[0];  //x
							float y = data[1];  //y
							float w = data[2];  //w
							float h = data[3];  //h
							int left = (x - 0.5 * w) * ratio_w;
							int top = (y - 0.5 * h) * ratio_h;
							class_ids.push_back(classIdPoint.x);
							confidences.push_back(max_class_socre * confidence);
							boxes.push_back(cv::Rect(left, top, int(w * ratio_w), int(h * ratio_h)));
						}
					}
					data += net_width;
				}
			}
		}
	}

	std::vector<int> nms_result;
	cv::dnn::NMSBoxes(boxes, confidences, SCORE_THRESHOLD, NMS_THRESHOLD, nms_result);
	for (int i = 0; i < nms_result.size(); i++) {
		int idx = nms_result[i];
		Detection result;
		result.class_id = class_ids[idx];
		result.confidence = confidences[idx];
		result.box = boxes[idx];
		output.push_back(result);
	}

}

int main(int argc, char** argv)
{

	std::vector<std::string> class_list = load_class_list();

	cv::Mat frame;
	cv::VideoCapture capture("1.mp4");//2.wmv   1.mp4
	if (!capture.isOpened())
	{
		std::cerr << "Error opening video file\n";
		return -1;
	}

	bool is_cuda = argc > 1 && strcmp(argv[1], "cuda") == 0;
	is_cuda = true;
	cv::dnn::Net net;
	load_net(net, is_cuda);

	auto start = std::chrono::high_resolution_clock::now();
	int frame_count = 0;
	float fps = -1;
	int total_frames = 0;

	while (true)
	{
		capture.read(frame);
		if (frame.empty())
		{
			std::cout << "End of stream\n";
			break;
		}

		std::vector<Detection> output;
		detect(frame, net, output, class_list);

		frame_count++;
		total_frames++;

		int detections = output.size();

		for (int i = 0; i < detections; ++i)
		{

			auto detection = output[i];
			auto box = detection.box;
			auto classId = detection.class_id;
			const auto color = colors[classId % colors.size()];
			cv::rectangle(frame, box, color, 3);
			std::ostringstream conf;
			conf << std::fixed << std::setprecision(2);
			conf << ": " << detection.confidence;
			std::string conf_str = conf.str();
			cv::rectangle(frame, cv::Point(box.x, box.y - 20), cv::Point(box.x + box.width, box.y), color, cv::FILLED);
			cv::putText(frame, class_list[classId].c_str() + conf_str, cv::Point(box.x, box.y - 5), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
		}

		if (frame_count >= 30)
		{

			auto end = std::chrono::high_resolution_clock::now();
			fps = frame_count * 1000.0 / std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();

			frame_count = 0;
			start = std::chrono::high_resolution_clock::now();
		}

		if (fps > 0)
		{

			std::ostringstream fps_label;
			fps_label << std::fixed << std::setprecision(2);
			fps_label << "FPS: " << fps;
			std::string fps_label_str = fps_label.str();

			cv::putText(frame, fps_label_str.c_str(), cv::Point(10, 25), cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 0, 255), 2);
		}

		cv::imshow("output", frame);

		if (cv::waitKey(1) != -1)
		{
			capture.release();
			std::cout << "finished by user\n";
			break;
		}
	}

	std::cout << "Total frames: " << total_frames << "\n";
	capture.release();
	cv::destroyAllWindows();
	return 0;
}