贾志刚OpenCV3.3深度神经网络DNN模块应用学习笔记

OpenCV3.3深度神经网络DNN模块应用全套视频、课程配套PPT的PDF版本和配套源码

全套例程源码、用到的模型文件、图片和视频素材整理

在线观看

 

实例1:读取单张PNG文件(opencv3.3环境测试)

实例2:GoogleNet-Caffe模型实现图像分类

实例3:SSD模型实现对象检测

实例4:SSD-MobileNet模型实时对象检测

实例5:FCN模型实现图像分割

实例6:CNN模型预测性别与年龄

实例7:GOTURN模型实现视频对象跟踪

 

1 概述 - DNN模块介绍

1.1 环境配置

 下载与配置OpenCV3.3

OpenCV3.3下载
包括头文件
D:\opencv-3.3\opencv\build\include
D:\opencv-3.3\opencv\build\include\opencv
D:\opencv-3.3\opencv\build\include\opencv2
  库文件
D:\opencv-3.3\opencv\build\x64\vc14\lib
链接器
opencv_world330d.lib
环境变量
D:\opencv-3.3\opencv\build\x64\vc14\bin

详细opencv3.3环境配置请点击

实例1:读取单张PNG文件(opencv3.3环境测试)

#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>//dnn模块类
#include <iostream>

using namespace cv;
using namespace std;

int main(int argc, char** argv) {
	Mat src = imread("tx.png");
	if (src.empty()) {
		printf("could not load image...\n");
		return -1;
	}
	namedWindow("input image", CV_WINDOW_AUTOSIZE);
	imshow("input image", src);
	
	waitKey(0);
	return 0;
}


1.2 DNN模块介绍

 Tiny-dnn模块
 支持深度学习框架
- Caffe
- TensorFlow
- Torch/PyTorch

1.3 支持的层类型

1.4 DNN模块

 图像分类
 对象检测
 实时对象检测
 图像分割
 预测
 视频对象跟踪

 

2 使用GoogleNet模型数据的图像分类

 Googlenet模型与数据介绍

Caffe - 模型下载

 bvlc_googlenet CNN模型

 基于100万张图像实现1000个分类


2.1 使用模型实现图像分类

 编码处理
- 加载Caffem模型
- 使用模型预测

实例2:GoogleNet-Caffe模型实现图像分类

#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>
#include <iostream>
//使用Googlenet Caffe模型实现图像分类
using namespace cv;
using namespace cv::dnn;
using namespace std;

String model_bin_file = "D:/opencv3.3/opencv/sources/samples/data/dnn/bvlc_googlenet.caffemodel";//模型二进制文件
String model_txt_file = "D:/opencv3.3/opencv/sources/samples/data/dnn/bvlc_googlenet.prototxt";//模型文本(描述)文件
String labels_txt_file = "D:/opencv3.3/opencv/sources/samples/data/dnn/synset_words.txt";//标签文本文件
vector<String> readLabels();//读写文件方法
int main(int argc, char** argv) {
	Mat src = imread("space_shuttle.jpg");
	if (src.empty()) {
		printf("could not load image...\n");
		return -1;
	}
	namedWindow("input image", CV_WINDOW_AUTOSIZE);
	imshow("input image", src);
	vector<String> labels = readLabels();
	//读取Caffe模型
	Net net = readNetFromCaffe(model_txt_file, model_bin_file);
	if (net.empty()) {//如果没读到模型
		printf("read caffe model data failure...\n");
		return -1;
	}
	//由bvlc_googlenet.prototxt知网络输入层大小为224*224
	Mat inputBlob = blobFromImage(src, 1.0, Size(224, 224), Scalar(104, 117, 123));
	Mat prob;
	for (int i = 0; i < 10; i++) {
		net.setInput(inputBlob, "data");//设置第一层数据层进行输入
		prob = net.forward("prob");//设置最后一层进行结果输出
	}
	Mat probMat = prob.reshape(1, 1);//转换成一行多列的分类结果
	Point classNumber;//最大可能性的分类号
	double classProb;//最大可能性的概率值
	minMaxLoc(probMat, NULL, &classProb, NULL, &classNumber);
	int classidx = classNumber.x;
	printf("\n current image classification : %s, possible : %.2f", labels.at(classidx).c_str(), classProb);
	//图片上放置文本  红色显示
	putText(src, labels.at(classidx), Point(20, 20), FONT_HERSHEY_SIMPLEX, 1.0, Scalar(0, 0, 255), 2, 8);
	imshow("Image Classification", src);

	waitKey(0);
	return 0;
}
vector<String> readLabels() {//读取标签文本文件
	vector<String> classNames;
	ifstream fp(labels_txt_file);//文件输入输出流
	if (!fp.is_open()) {//如果文件未打开
		printf("could not open the file");
		exit(-1);
	}
	string name;
	while (!fp.eof()) {//如果文件并未读取到结尾
		getline(fp, name);//读取文件每一行
		if (name.length()) {
			classNames.push_back(name.substr(name.find(' ') + 1));//字符拆解与分割
		}
	}
	fp.close();//关闭文件输入输出流
	return classNames;//返回分类名
}

航天飞机,概率100%

山地单车,概率93%

 

3 使用SSD模型实现对象检测

 SSD模型与数据介绍
 使用模型实现对象检测

3.1 SSD模型与数据介绍

 SSD模型
- https://github.com/weiliu89/caffe/tree/ssd#models
 Fast –R-CNN模型基础上延伸
 基于PASCAL VOC数据集实现200个分类对象检测

3.2 模型文件

 二进制模型
- VGG_ILSVRC2016_SSD_300x300_iter_440000.caffemodel
 网络描述
- ILSVRC2016/SSD_300x300/deploy.prototxt
 分类信息
- ILSVRC2016/SSD_300x300/labelmap_det.txt

3.3 使用模型实现图像分类

 编码处理
- 加载Caffem模型
- 使用模型预测

实例3:SSD模型实现对象检测

#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>
#include <iostream>

using namespace cv;
using namespace cv::dnn;
using namespace std;

const size_t width = 300;//模型尺寸为300*300
const size_t height = 300;
//label文件
String labelFile = "D:/opencv3.3/opencv/sources/samples/data/dnn/labelmap_det.txt";
//模型文件
String modelFile = "D:/opencv3.3/opencv/sources/samples/data/dnn/VGG_ILSVRC2016_SSD_300x300_iter_440000.caffemodel";
//模型描述文件
String model_text_file = "D:/opencv3.3/opencv/sources/samples/data/dnn/deploy.prototxt";

vector<String> readLabels();
const int meanValues[3] = { 104, 117, 123 };
static Mat getMean(const size_t &w, const size_t &h) {
	Mat mean;
	vector<Mat> channels;
	for (int i = 0; i < 3; i++) {
		Mat channel(h, w, CV_32F, Scalar(meanValues[i]));
		channels.push_back(channel);
	}
	merge(channels, mean);
	return mean;
}

static Mat preprocess(const Mat &frame) {
	Mat preprocessed;
	frame.convertTo(preprocessed, CV_32F);
	resize(preprocessed, preprocessed, Size(width, height)); // 300x300 image
	Mat mean = getMean(width, height);
	subtract(preprocessed, mean, preprocessed);
	return preprocessed;
}

int main(int argc, char** argv) {
	Mat frame = imread("persons.png");
	if (frame.empty()) {
		printf("could not load image...\n");
		return -1;
	}
	namedWindow("input image", CV_WINDOW_AUTOSIZE);
	imshow("input image", frame);

	vector<String> objNames = readLabels();
	// import Caffe SSD model
	Ptr<dnn::Importer> importer;
	try {
		importer = createCaffeImporter(model_text_file, modelFile);
	}
	catch (const cv::Exception &err) {
		cerr << err.msg << endl;
	}
	//初始化网络
	Net net;
	importer->populateNet(net);
	importer.release();

	Mat input_image = preprocess(frame);//获取输入图像
	Mat blobImage = blobFromImage(input_image);//将图像转换为blob

	net.setInput(blobImage, "data");//将图像转换的blob数据输入到网络的第一层“data”层,见deploy.protxt文件
	Mat detection = net.forward("detection_out");//结果输出(最后一层“detection_out”层)输出给detection
	Mat detectionMat(detection.size[2], detection.size[3], CV_32F, detection.ptr<float>());
	float confidence_threshold = 0.2;//自信区间,可以修改,越低检测到的物体越多
	for (int i = 0; i < detectionMat.rows; i++) {
		float confidence = detectionMat.at<float>(i, 2);
		if (confidence > confidence_threshold) {
			size_t objIndex = (size_t)(detectionMat.at<float>(i, 1));
			float tl_x = detectionMat.at<float>(i, 3) * frame.cols;
			float tl_y = detectionMat.at<float>(i, 4) * frame.rows;
			float br_x = detectionMat.at<float>(i, 5) * frame.cols;
			float br_y = detectionMat.at<float>(i, 6) * frame.rows;

			Rect object_box((int)tl_x, (int)tl_y, (int)(br_x - tl_x), (int)(br_y - tl_y));
			//标记框
			rectangle(frame, object_box, Scalar(0, 0, 255), 2, 8, 0);
			//设置颜色
			putText(frame, format("%s", objNames[objIndex].c_str()), Point(tl_x, tl_y), FONT_HERSHEY_SIMPLEX, 1.0, Scalar(255, 0, 0), 2);
		}
	}
	imshow("ssd-demo", frame);

	waitKey(0);
	return 0;
}

vector<String> readLabels() {
	vector<String> objNames;
	ifstream fp(labelFile);
	if (!fp.is_open()) {
		printf("could not open the file...\n");
		exit(-1);
	}
	string name;
	while (!fp.eof()) {
		getline(fp, name);
		if (name.length() && (name.find("display_name:") == 0)) {
			string temp = name.substr(15);
			temp.replace(temp.end() - 1, temp.end(), "");
			objNames.push_back(temp);
		}
	}
	return objNames;
}

     

      

由于SSD模型支持200个分类,分类数目比较多,所以运行时间会长一些

 

4 SSD-MobileNet模型实时对象检测

4.1  MobileNet模型与数据介绍

 SSD-MobileNet模型
- https://github.com/weiliu89/caffe/tree/ssd#models
 SSD模型的分类子集
 支持20个分类标签
 实时检测

4.2 模型文件

 二进制模型
- MobileNetSSD_deploy.caffemodel
 网络描述
- MobileNetSSD_deploy.prototxt
 分类信息
- 20个分类

4.3 使用模型实现对象检测

 编码处理
- 加载Caffem模型
- 使用模型预测

实例4:SSD-MobileNet模型实时对象检测

#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>
#include <iostream>

using namespace cv;
using namespace cv::dnn;
using namespace std;

const size_t width = 300;
const size_t height = 300;
const float meanVal = 127.5;//均值
const float scaleFactor = 0.007843f;
const char* classNames[] = { "background",
"aeroplane", "bicycle", "bird", "boat",
"bottle", "bus", "car", "cat", "chair",
"cow", "diningtable", "dog", "horse",
"motorbike", "person", "pottedplant",
"sheep", "sofa", "train", "tvmonitor" };
//模型文件
String modelFile = "D:/opencv3.3/opencv/sources/samples/data/dnn/MobileNetSSD_deploy.caffemodel";
//二进制描述文件
String model_text_file = "D:/opencv3.3/opencv/sources/samples/data/dnn/MobileNetSSD_deploy.prototxt";

int main(int argc, char** argv) {
	VideoCapture capture;//读取视频
	capture.open("01.mp4");
	namedWindow("input", CV_WINDOW_AUTOSIZE);
	int w = capture.get(CAP_PROP_FRAME_WIDTH);//获取视频宽度
	int h = capture.get(CAP_PROP_FRAME_HEIGHT	);//获取视频高度
	printf("frame width : %d, frame height : %d", w, h);

	// set up net
	Net net = readNetFromCaffe(model_text_file, modelFile);

	Mat frame;
	while (capture.read(frame)) {
		imshow("input", frame);

		// 预测
		Mat inputblob = blobFromImage(frame, scaleFactor, Size(width, height), meanVal, false);
		net.setInput(inputblob, "data");
		Mat detection = net.forward("detection_out");

		// 绘制
		Mat detectionMat(detection.size[2], detection.size[3], CV_32F, detection.ptr<float>());
		float confidence_threshold = 0.25;//自信区间,越小检测到的物体越多(>=0.25)
		for (int i = 0; i < detectionMat.rows; i++) {
			float confidence = detectionMat.at<float>(i, 2);
			if (confidence > confidence_threshold) {
				size_t objIndex = (size_t)(detectionMat.at<float>(i, 1));
				float tl_x = detectionMat.at<float>(i, 3) * frame.cols;
				float tl_y = detectionMat.at<float>(i, 4) * frame.rows;
				float br_x = detectionMat.at<float>(i, 5) * frame.cols;
				float br_y = detectionMat.at<float>(i, 6) * frame.rows;

				Rect object_box((int)tl_x, (int)tl_y, (int)(br_x - tl_x), (int)(br_y - tl_y));
				rectangle(frame, object_box, Scalar(0, 0, 255), 2, 8, 0);
				putText(frame, format("%s", classNames[objIndex]), Point(tl_x, tl_y), FONT_HERSHEY_SIMPLEX, 1.0, Scalar(255, 0, 0), 2);
			}
		}
		imshow("ssd-video-demo", frame);
		char c = waitKey(5);
		if (c == 27) { // 如果ESC按下
			break;
		}
	}
	capture.release();
	waitKey(0);
	return 0;
}

视频效果

 

5 FCN模型图像分割

5.1 FCN模型模型与数据介绍

FCN模型
支持20个分割标签


 使用模型实现图像分割

5.2 模型文件

 二进制模型
- fcn8s-heavy-pascal.caffemodel   官网下载
 网络描述
- fcn8s-heavy-pascal.prototxt
 分割信息
- pascal-classes.txt
- 20个分类

5.3 使用模型实现图像分割

 编码处理
- 加载Caffem模型
- 使用模型预测

实例5:FCN模型实现图像分割

#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>
#include <iostream>

using namespace cv;
using namespace cv::dnn;
using namespace std;

const size_t width = 300;
const size_t height = 300;
String labelFile = "D:/opencv3.3/opencv/sources/samples/data/dnn/pascal-classes.txt";
String modelFile = "D:/opencv3.3/opencv/sources/samples/data/dnn/fcn8s-heavy-pascal.caffemodel";
String model_text_file = "D:/opencv3.3/opencv/sources/samples/data/dnn/fcn8s-heavy-pascal.prototxt";

vector<Vec3b> readColors();
int main(int argc, char** argv) {
	Mat frame = imread("rgb.jpg");
	if (frame.empty()) {
		printf("could not load image...\n");
		return -1;
	}
	namedWindow("input image", CV_WINDOW_AUTOSIZE);
	imshow("input image", frame);
	resize(frame, frame, Size(500, 500));//改变尺寸
	vector<Vec3b> colors = readColors();
//
	// init net  初始化网络
	Net net = readNetFromCaffe(model_text_file, modelFile);
	Mat blobImage = blobFromImage(frame);

	// use net   使用网络
	float time = getTickCount();
	net.setInput(blobImage, "data");
	Mat score = net.forward("score");
	float tt = getTickCount() - time;
	printf("time consume: %.2f ms \n", (tt / getTickFrequency()) * 1000);
	
	// segmentation and display   分割并显示
	const int rows = score.size[2];
	const int cols = score.size[3];
	const int chns = score.size[1];
	Mat maxCl(rows, cols, CV_8UC1);
	Mat maxVal(rows, cols, CV_32FC1);

	// setup LUT  LUT查找
	for (int c = 0; c < chns; c++) {
		for (int row = 0; row < rows; row++) {
			const float *ptrScore = score.ptr<float>(0, c, row);
			uchar *ptrMaxCl = maxCl.ptr<uchar>(row);
			float *ptrMaxVal = maxVal.ptr<float>(row);
			for (int col = 0; col < cols; col++) {
				if(ptrScore[col] > ptrMaxVal[col]) {
					ptrMaxVal[col] = ptrScore[col];
					ptrMaxCl[col] = (uchar)c;
				}
			}
		}
	}

	// look up colors 找到对应颜色
	Mat result = Mat::zeros(rows, cols, CV_8UC3);
	for (int row = 0; row < rows; row++) {
		const uchar *ptrMaxCl = maxCl.ptr<uchar>(row);
		Vec3b *ptrColor = result.ptr<Vec3b>(row);
		for (int col = 0; col < cols; col++) {
			ptrColor[col] = colors[ptrMaxCl[col]];
		}
	}
	Mat dst;
	imshow("FCN-demo1", result);
	addWeighted(frame, 0.3, result, 0.7, 0, dst);//增加宽度
	imshow("FCN-demo", dst);

	waitKey(0);
	return 0;
}

vector<Vec3b> readColors() {
	vector<Vec3b> colors;
	ifstream fp(labelFile);
	if (!fp.is_open()) {
		printf("could not open the file...\n");
		exit(-1);
	}
	string line;
	while (!fp.eof()) {
		getline(fp, line);
		if (line.length()) {
			stringstream ss(line);
			string name;
			ss >> name;
			int temp;
			Vec3b color;
			ss >> temp;
			color[0] = (uchar)temp;
			ss >> temp;
			color[1] = (uchar)temp;
			ss >> temp;
			color[2] = (uchar)temp;
			colors.push_back(color);
		}
	}
	return colors;
}

                                   

 

 pascal可实现分割的种类和显示颜色(BGR)可见 pascal-classes.txt文件

                       

 

6 CNN模型预测性别与年龄

 age_net.caffemodel
 deploy_age.prototxt
 gender_net.caffemodel
 deploy_gender.prototxt

6.1 级联分类器人脸检测

 HAAR数据
 人脸检测

6.2 使用模型

 编码处理
- 加载Caffem模型
- 使用模型预测

实例6:CNN模型预测性别与年龄

#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>
#include <iostream>

using namespace cv;
using namespace cv::dnn;
using namespace std;
//人脸检测文件
String haar_file = "D:/opencv3.3/opencv/build/etc/haarcascades/haarcascade_frontalface_alt_tree.xml";
//年龄预测模型
String age_model = "D:/opencv3.3/opencv/sources/samples/data/dnn/age_net.caffemodel";
//年龄描述文件
String age_text = "D:/opencv3.3/opencv/sources/samples/data/dnn/deploy_age.prototxt";

//性别预测模型
String gender_model = "D:/opencv3.3/opencv/sources/samples/data/dnn/gender_net.caffemodel";
//年龄描述文件
String gender_text = "D:/opencv3.3/opencv/sources/samples/data/dnn/deploy_gender.prototxt";

void predict_age(Net &net, Mat &image);//预测年龄 
void predict_gender(Net &net, Mat &image);//预测性别
int main(int argc, char** argv) {
	Mat src = imread("star_lady.png");
	if (src.empty()) {
		printf("could not load image...\n");
		return -1;
	}
	namedWindow("input", CV_WINDOW_AUTOSIZE);
	imshow("input", src);
	CascadeClassifier detector;
	detector.load(haar_file);//人脸检测
	vector<Rect> faces;
	Mat gray;
	cvtColor(src, gray, COLOR_BGR2GRAY);
	detector.detectMultiScale(gray, faces, 1.02, 1, 0, Size(40, 40), Size(200, 200));
	//加载网络
	Net age_net = readNetFromCaffe(age_text, age_model);
	Net gender_net = readNetFromCaffe(gender_text, gender_model);

	for (size_t t= 0; t < faces.size(); t++) {
		rectangle(src, faces[t], Scalar(30, 255, 30), 2, 8, 0);
		//年龄、性别预测
		Mat face = src(faces[t]);//自己加的,不加会报错,提示类型错误
		predict_age(age_net, face);
		predict_gender(age_net, face);
	}
	imshow("age-gender-prediction-demo", src);

	waitKey(0);
	return 0;
}

vector<String> ageLabels() {
	vector<String> ages;
	ages.push_back("0-2");
	ages.push_back("4 - 6");
	ages.push_back("8 - 13");
	ages.push_back("15 - 20");
	ages.push_back("25 - 32");
	ages.push_back("38 - 43");
	ages.push_back("48 - 53");
	ages.push_back("60-");
	return ages;
}

void predict_age(Net &net, Mat &image) {
	// 输入
	Mat blob = blobFromImage(image, 1.0, Size(227, 227));
	net.setInput(blob, "data");
	// 预测分类
	Mat prob = net.forward("prob");
	Mat probMat = prob.reshape(1, 1);//变为一行
	Point classNum;
	double classProb;

	vector<String> ages = ageLabels();
	minMaxLoc(probMat, NULL, &classProb, NULL, &classNum);//提取最大概率的编号和概率值
	int classidx = classNum.x;
	putText(image, format("age:%s", ages.at(classidx).c_str()), Point(2, 10), FONT_HERSHEY_PLAIN, 0.8, Scalar(0, 0, 255), 1);
}

void predict_gender(Net &net, Mat &image) {
	// 输入
	Mat blob = blobFromImage(image, 1.0, Size(227, 227));
	net.setInput(blob, "data");
	// 预测分类
	Mat prob = net.forward("prob");
	Mat probMat = prob.reshape(1, 1);
	putText(image, format("gender:%s", (probMat.at<float>(0, 0) > probMat.at<float>(0, 1) ? "M" : "F")),
		Point(2, 20), FONT_HERSHEY_PLAIN, 0.8, Scalar(0, 0, 255), 1);
}

             

 

7 GOTURN模型实现视频对象跟踪

 GOTURN(Generic Object Tracking Using
Regression Networks)介绍
 下载模型与使用

7.1 GOTURN算法介绍

7.2 使用模型实现对象跟踪

 编码处理
- 加载Caffem模型
- 使用模型预测

模型下载地址

实例7:GOTURN模型实现视频对象跟踪

#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>
#include <iostream>

using namespace cv;
using namespace cv::dnn;
using namespace std;

String  goturn_model = "D:/opencv3.3/opencv/sources/samples/data/dnn/goturn.caffemodel";
String goturn_prototxt = "D:/opencv3.3/opencv/sources/samples/data/dnn/goturn.prototxt";

Net net;
void initGoturn();
Rect trackObjects(Mat& frame, Mat& prevFrame);
Mat frame, prevFrame;
Rect prevBB;
int main(int argc, char** argv) {
	initGoturn();
	VideoCapture capture;
	capture.open("01.mp4");
	capture.read(frame);
	frame.copyTo(prevFrame);
	prevBB = selectROI(frame, true, true);
	namedWindow("frame", CV_WINDOW_AUTOSIZE);
	while (capture.read(frame)) {
		Rect currentBB = trackObjects(frame, prevFrame);
		rectangle(frame, currentBB, Scalar(0, 0, 255), 2, 8, 0);

		// ready for next frame
		frame.copyTo(prevFrame);
		prevBB.x = currentBB.x;
		prevBB.y = currentBB.y;
		prevBB.width = currentBB.width;
		prevBB.height = currentBB.height;

		imshow("frame", frame);
		char c = waitKey(50);
		if (c == 27) {
			break;
		}
	}
}

void initGoturn() {
	Ptr<Importer> importer;
	importer = createCaffeImporter(goturn_prototxt, goturn_model);
	importer->populateNet(net);
	importer.release();
}

Rect trackObjects(Mat& frame, Mat& prevFrame) {
	Rect rect;
	int INPUT_SIZE = 227;
	//Using prevFrame & prevBB from model and curFrame GOTURN calculating curBB
	Mat curFrame = frame.clone();
	Rect2d curBB;

	float padTargetPatch = 2.0;
	Rect2f searchPatchRect, targetPatchRect;
	Point2f currCenter, prevCenter;
	Mat prevFramePadded, curFramePadded;
	Mat searchPatch, targetPatch;

	prevCenter.x = (float)(prevBB.x + prevBB.width / 2);
	prevCenter.y = (float)(prevBB.y + prevBB.height / 2);

	targetPatchRect.width = (float)(prevBB.width * padTargetPatch);
	targetPatchRect.height = (float)(prevBB.height * padTargetPatch);
	targetPatchRect.x = (float)(prevCenter.x - prevBB.width * padTargetPatch / 2.0 + targetPatchRect.width);
	targetPatchRect.y = (float)(prevCenter.y - prevBB.height * padTargetPatch / 2.0 + targetPatchRect.height);

	copyMakeBorder(prevFrame, prevFramePadded, (int)targetPatchRect.height, (int)targetPatchRect.height, (int)targetPatchRect.width, (int)targetPatchRect.width, BORDER_REPLICATE);
	targetPatch = prevFramePadded(targetPatchRect).clone();

	copyMakeBorder(curFrame, curFramePadded, (int)targetPatchRect.height, (int)targetPatchRect.height, (int)targetPatchRect.width, (int)targetPatchRect.width, BORDER_REPLICATE);
	searchPatch = curFramePadded(targetPatchRect).clone();

	//Preprocess
	//Resize
	resize(targetPatch, targetPatch, Size(INPUT_SIZE, INPUT_SIZE));
	resize(searchPatch, searchPatch, Size(INPUT_SIZE, INPUT_SIZE));

	//Mean Subtract
	targetPatch = targetPatch - 128;
	searchPatch = searchPatch - 128;

	//Convert to Float type
	targetPatch.convertTo(targetPatch, CV_32F);
	searchPatch.convertTo(searchPatch, CV_32F);

	Mat targetBlob = blobFromImage(targetPatch);
	Mat searchBlob = blobFromImage(searchPatch);

	net.setInput(targetBlob, ".data1");
	net.setInput(searchBlob, ".data2");

	Mat res = net.forward("scale");
	Mat resMat = res.reshape(1, 1);
	//printf("width : %d, height : %d\n", (resMat.at<float>(2) - resMat.at<float>(0)), (resMat.at<float>(3) - resMat.at<float>(1)));

	curBB.x = targetPatchRect.x + (resMat.at<float>(0) * targetPatchRect.width / INPUT_SIZE) - targetPatchRect.width;
	curBB.y = targetPatchRect.y + (resMat.at<float>(1) * targetPatchRect.height / INPUT_SIZE) - targetPatchRect.height;
	curBB.width = (resMat.at<float>(2) - resMat.at<float>(0)) * targetPatchRect.width / INPUT_SIZE;
	curBB.height = (resMat.at<float>(3) - resMat.at<float>(1)) * targetPatchRect.height / INPUT_SIZE;

	//Predicted BB
	Rect boundingBox = curBB;
	return boundingBox;
}

     

  • 3
    点赞
  • 32
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

亦我飞也

你的鼓励将是我创作的最大动力!

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值