关于GOTURN的来源,可以查看论文原文:http://davheld.github.io/GOTURN/GOTURN.pdf
在作者文章中提到,利用GOTURN进行神经网络训练,对跟踪的对象能够实时进行跟踪,且FPS能够达到100,但是只限于在GPU上,对于CPU,还是有点不理想。
下图中,我们先框定前一帧的对象,即下图中的“what to track”,然后在当前帧中搜索可能的对象,即“search region”,经过对比,两张图像并不是完全一致,将二者都经过卷积层,通过全连接层进行回归,框定最终对象,然后再将框定的对象作为前一帧,不断循环,直到视频播放结束为止。
首先需要下载模型,模型下载地址为:GOTURN模型
实例:
#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>
#include <iostream>
using namespace cv;
using namespace cv::dnn;
using namespace std;
String goturn_model = "D:/test/dnn/goturn/goturn.caffemodel";
String goturn_prototxt = "D:/test/dnn/goturn/goturn.prototxt";
Net net;
Rect trackObjects(Mat &frame, Mat &prevFrame);
Mat frame, prevFrame;
Rect prevBB;
int main(int argc, char** argv) {
net = readNetFromCaffe(goturn_prototxt, goturn_model);
VideoCapture capture;
capture.open("D:/test/dog.avi");
capture.read(frame);
frame.copyTo(prevFrame);
prevBB = selectROI(frame, true, true);
namedWindow("frame", WINDOW_AUTOSIZE);
while (capture.read(frame)) {
Rect currentBB = trackObjects(frame, prevFrame);
rectangle(frame, currentBB, Scalar(0, 0, 255), 2, 8, 0);
// ready for next frame
frame.copyTo(prevFrame);
prevBB.x = currentBB.x;
prevBB.y = currentBB.y;
prevBB.width = currentBB.width;
prevBB.height = currentBB.height;
imshow("frame", frame);
char c = waitKey(10);
if (c == 27) {
break;
}
}
}
Rect trackObjects(Mat& frame, Mat &prevFrame) {
Rect rect;
int INPUT_SIZE = 227;
//Using prevFrame & prevBB from model and curFrame GOTURN calculating curBB
Mat curFrame = frame.clone();
Rect2d curBB;
float padTargetPatch = 2.0;
Rect2f searchPatchRect, targetPatchRect;
Point2f currCenter, prevCenter;
Mat prevFramePadded, curFramePadded;
Mat searchPatch, targetPatch;
prevCenter.x = (float)(prevBB.x + prevBB.width / 2);
prevCenter.y = (float)(prevBB.y + prevBB.height / 2);
targetPatchRect.width = (float)(prevBB.width*padTargetPatch);
targetPatchRect.height = (float)(prevBB.height*padTargetPatch);
targetPatchRect.x = (float)(prevCenter.x - prevBB.width*padTargetPatch / 2.0 + targetPatchRect.width);
targetPatchRect.y = (float)(prevCenter.y - prevBB.height*padTargetPatch / 2.0 + targetPatchRect.height);
copyMakeBorder(prevFrame, prevFramePadded, (int)targetPatchRect.height, (int)targetPatchRect.height, (int)targetPatchRect.width, (int)targetPatchRect.width, BORDER_REPLICATE);
targetPatch = prevFramePadded(targetPatchRect).clone();
copyMakeBorder(curFrame, curFramePadded, (int)targetPatchRect.height, (int)targetPatchRect.height, (int)targetPatchRect.width, (int)targetPatchRect.width, BORDER_REPLICATE);
searchPatch = curFramePadded(targetPatchRect).clone();
//Preprocess
//Resize
resize(targetPatch, targetPatch, Size(INPUT_SIZE, INPUT_SIZE));
resize(searchPatch, searchPatch, Size(INPUT_SIZE, INPUT_SIZE));
//Mean Subtract
targetPatch = targetPatch - 128;
searchPatch = searchPatch - 128;
//Convert to Float type
targetPatch.convertTo(targetPatch, CV_32F);
searchPatch.convertTo(searchPatch, CV_32F);
Mat targetBlob = blobFromImage(targetPatch);
Mat searchBlob = blobFromImage(searchPatch);
net.setInput(targetBlob, "data1");
net.setInput(searchBlob, "data2");
Mat res = net.forward("scale");
Mat resMat = res.reshape(1, 1);
//printf("width : %d, height : %d\n", (resMat.at<float>(2) - resMat.at<float>(0)), (resMat.at<float>(3) - resMat.at<float>(1)));
curBB.x = targetPatchRect.x + (resMat.at<float>(0) * targetPatchRect.width / INPUT_SIZE) - targetPatchRect.width;
curBB.y = targetPatchRect.y + (resMat.at<float>(1) * targetPatchRect.height / INPUT_SIZE) - targetPatchRect.height;
curBB.width = (resMat.at<float>(2) - resMat.at<float>(0)) * targetPatchRect.width / INPUT_SIZE;
curBB.height = (resMat.at<float>(3) - resMat.at<float>(1)) * targetPatchRect.height / INPUT_SIZE;
//Predicted BB
Rect boundingBox = curBB;
return boundingBox;
}