ByteTrack_YOLO v5目标跟踪，openCV C++实现（详细注释）

Voluntino

已于 2024-08-06 16:35:04 修改

阅读量1.4k

点赞数 14

分类专栏： Python学习笔记文章标签： YOLO 目标跟踪 opencv

于 2024-08-06 16:34:40 首次发布

本文链接：https://blog.csdn.net/weixin_43013458/article/details/140959320

版权

Python学习笔记专栏收录该内容

3 篇文章 0 订阅

订阅专栏

在这里插入图片描述

这里写目录标题

YOLO v5检测器
- Pytorch导出ONNX模型
- 使用OpenCV推理
ByteTrack跟踪器
- 跟踪器部署
- 结合YOLO v5检测器

前文回顾
ByteTrack论文原理python代码解析

在ByteTrack的论文和开源代码中使用YOLOX作为检测器，且使用的是TensorRT实现。鉴于YOLO v5应用广泛，且对openCV更加熟悉，本博文将用C++ openCV实现基于YOLO v5检测器的ByteTrack，本文仅介绍部署和实现，不讲解原理，原理代码可到前文查看。

所用库版本

opencv-4.5.1
eigen-3.3.8

YOLO v5检测器

Pytorch导出ONNX模型

使用YOLO V5的仓库中的export.py将训练好的.pt文件转换为.onnx文件。

使用OpenCV推理

上一步导出的ONNX模型，可以使用openCV的dnn库直接进行推理，只需要在推理的前后进行预处理和后处理即可。

步骤如下：读取模型文件 -> 图片预处理（编码） -> 推理结果解码

接下来，按照以上三个步骤写成三个函数，便于理解和集成到项目中。
头文件.h:

#pragma once
#include <iostream>
#include <opencv2/opencv.hpp>
#include "BYTETracker.h"    // 引入定义的Object结构体,此文件在后文中介绍

class Yolo {
public:
	Yolo() {
	}
	~Yolo() {}
	
	int readModel(cv::dnn::Net &net, std::string &netPath, bool isCuda);
	int preprocessImg(Mat &SrcImg, Mat &blob);
	int decode_outputs(Mat &SrcImg, Net &net, Mat &blob, vector<Object> &output)

private:
	const int netWidth = 640;
	const int netHeight = 640;
	float nmsThreshold = 0.45;
	float boxThreshold = 0.55;
	float classThreshold = 0.55;
	std::vector<std::string> className = { "face"};
};

#pragma once

源文件.cpp:

#include"AIYoloSAM.h";

using namespace std;
using namespace cv;

/******************** 1. `readModel()` ********************/

int Yolo::readModel(Net &net, string &netPath, bool isCuda = false) {
	try {
		# opencv的readNet函数，读取.onnx模型文件的文件名
		net = dnn::readNet(netPath);
	}
	catch (const std::exception&) {
		return 1;
	}

	// 若opencv编译了CUDA版本可以设置使用GPU推理
	if (isCuda) {
		net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
		net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);
	}
	// 否则使用CPU也可以，速度慢许多
	else {
		net.setPreferableBackend(cv::dnn::DNN_BACKEND_DEFAULT);
		net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
	}
	return 0;
}

/******************** 2. `preprocessImg()` ********************/

int Yolo::preprocessImg(Mat &SrcImg, Mat &blob) {
	int col = SrcImg.cols;
	int row = SrcImg.rows;

	Mat netInputImg = SrcImg.clone();

	// opencv的blobFromImage函数
	// 对图片进行二值化、调整图片尺寸为网络输入所需的大小、图片序列化
	dnn::blobFromImage(netInputImg, blob, 1 / 255.0, cv::Size(netWidth, netHeight), cv::Scalar(0, 0, 0), true, false);

	return 0;
}

/****************** 3. `decode_outputs()` ******************/

int Yolo::decode_outputs(Mat &SrcImg, Net &net, Mat &blob, vector<Object> &output) {
	// 预处理的图片设为网络的输入
	net.setInput(blob);
	// 用来存放网络输出
	std::vector<cv::Mat> netOutput;

	std::vector<int> classIds;//网络输出的id
	std::vector<float> confidences;//网络输出的id的置信度分数
	std::vector<cv::Rect> rects;//网络输出id的矩形框

	// 用于恢复原图尺寸
	float ratio_h = (float)netInputImg.rows / netHeight;
	float ratio_w = (float)netInputImg.cols / netWidth;
	
	int net_width = className.size() + 5;  
	float* pdata = (float*)netOutputImg[0].data;
	// 对所有检测锚进行扫描
	for (int stride = 0; stride < 3; stride++) {
		int grid_x = (int)(netWidth / netStride[stride]);
		int grid_y = (int)(netHeight / netStride[stride]);
		for (int anchor = 0; anchor < 3; anchor++) {
			for (int i = 0; i < grid_y; i++) {
				for (int j = 0; j < grid_x; j++) {
					float rect_score = pdata[4]; // 检测框的分数
					// 分数大于置信度阈值
					if (rect_score > rectThreshold) {
						接下来检查最高置信度分数属于哪个分类类别
						cv::Mat scores(1, className.size(), CV_32FC1, pdata + 5);
						Point classIdPoint;
						double max_class_socre;
						minMaxLoc(scores, 0, &max_class_socre, 0, &classIdPoint);
						max_class_socre = (float)max_class_socre;

						// 最高置信度分数大于阈值
						if (max_class_socre > classThreshold) {
							// 解码矩形框的中心和宽高
							float x_center = pdata[0];
							float y_center = pdata[1];
							float w = pdata[2];
							float h = pdata[3];
							// 把中心转换为矩形框的左上角坐标，并对应到到原图尺寸
							float left = (x_center - 0.5*w)*ratio_w;
							float top = (y_center - 0.5*h)*ratio_h;

							// 存下id, 置信度分数, 矩形框的ltwh
							classIds.push_back(classIdPoint.x);
							confidences.push_back(max_class_socre*box_score);
							rects.push_back(Rect_<float>(left, top, w*ratio_w, h*ratio_h));
						}
					}
					// 继续下一个扫描
					pdata += net_width;
				}
			}
		}
	}

	//NMS 非最大抑制以消除具有较低置信度的冗余重叠框
	vector<int> nms_result;
	// openCV的NMSBoxes
	dnn::NMSBoxes(rects, confidences, classThreshold, nmsThreshold, nms_result);
	for (int i = 0; i < nms_result.size(); i++) {
		int idx = nms_result[i];
		Output object;
		
		object.label = classIds[idx];
		object.prob = confidences[idx];
		object.rect = rects[idx];
		output.push_back(object);	
	}

	return output.size();
}

ByteTrack跟踪器

跟踪器部署

在原文的github仓库下载项目，将文件夹./deploy/TensorRT/cpp/include和./deploy/TensorRT/cpp/src`中的以下文件添加入项目：

BYTETracker.h    
dataType.h      
kalmanFilter.h  
lapjv.h          
STrack.h         

BYTETracker.cpp  // ByteTracker逻辑实现的核心代码
kalmanFilter.cpp // Tracker要用到的kalman滤波器代码
lapjv.cpp        // 矩阵计算
STrack.cpp       // 自定义的航迹类型
utils.cpp        // 处理航迹的工具代码

你会发现我们没有使用仓库中的bytetrack.cpp，这个文件中就是用TensorRT推理YOLOX，我们不使用这个，只要用上文写好的openCV推理的YOLO V5就可以啦，继续往下。

结合YOLO v5检测器

接下来新建main.cpp，结合上文自己实现的yolov5检测器和下载添加的ByteTrack跟踪器进行目标跟踪。

#include <iostream>
#include <fstream>
#include <string>
#include <opencv2//opencv.hpp>
#include <math.h>
#include "YoloSeg.h"
#include "AIYoloSAM.h"
#include "BYTETracker.h"
#include<time.h>

using namespace std;
using namespace cv;
using namespace dnn;

// 获取文件夹下所有图片名
void getFiles(string path, vector<string>& files)
{
	//文件句柄
	intptr_t hFile = 0;
	//文件信息
	struct _finddata_t fileinfo;
	string p;
	if ((hFile = _findfirst(p.assign(path).append("\\*").c_str(), &fileinfo)) != -1)
	{
		do
		{
			//如果是目录,迭代之
			//如果不是,加入列表
			if ((fileinfo.attrib &  _A_SUBDIR))
			{
				if (strcmp(fileinfo.name, ".") != 0 && strcmp(fileinfo.name, "..") != 0)
					getFiles(p.assign(path).append("\\").append(fileinfo.name), files);
			}
			else
			{
				files.push_back(p.assign(path).append("\\").append(fileinfo.name));
			}
		} while (_findnext(hFile, &fileinfo) == 0);
		_findclose(hFile);
	}
}


int main()
{
	string img_path = "../video";
	string save_path = "../outputVideo/";
	string model_path = "../model.onnx";

	// 实例化跟踪器，param_1 帧率，param_2 缓存，一般设置跟帧率一样即可
	BYTETracker tracker(30, 30);
	// 实例化检测器
	Yolo detection;
	Net net;

	// 检测器的第1步，读取模型
	if (detection.readModel(net, model_path, false)) {
		cout << "读取模型成功" << endl;
	}

	vector<string> files;
	// 视频文件可以一帧一帧存为图片放到文件夹下，然后一张一张读取处理
	// 当然也可以读取视频文件，但也要一帧一帧地循环处理
	getFiles(img_path, files);
	for (int i = 0; i < files.size(); i++) {
		Mat img = imread(files[i]);
		if (img.empty())
			break;

		// 存放检测器结果
		/*
		struct Object
		{
		    cv::Rect_<float> rect;
		    int label;
		    float prob;
		};
		*/
		vector<Object> objects;
		// 检测器的第2步，图片预处理（编码）
		Mat blob;
		detection.preprocessImg(img, blob)
		// 检测器的第三步，解码得到输出
		detection.decode_outputs(img, net, blob, objects)

		// 检测器的输出用跟踪器tracker进行航迹跟踪
		// 原作者项目解耦的很好，c++实现ByteTrack的核心其实就这一句，其他的都是yolo获得检测框
		vector<STrack> output_stracks = tracker.update(objects);

		// 把跟踪结果画出来并保存图片
		for (int i = 0; i < output_stracks.size(); i++)
		{
			vector<float> tlwh = output_stracks[i].tlwh;
			bool vertical = tlwh[2] / tlwh[3] > 1.6;
			if (tlwh[2] * tlwh[3] > 20 && !vertical)
			{
				Scalar s = tracker.get_color(output_stracks[i].track_id);
				putText(img, format("%d", output_stracks[i].track_id), Point(tlwh[0], tlwh[1] - 5),
					0, 0.6, Scalar(0, 0, 255), 3, LINE_AA);
				rectangle(img, Rect(tlwh[0], tlwh[1], tlwh[2], tlwh[3]), s, 3);
			}
		}
		imwrite(save_path + to_string(i) + string(".png"), img);
	}

	return 0;
}