这篇文章是通过那篇伟大的benchmark上看到的,说这篇文章各种好,所以就去看了下,说实在的,都看了2次了,这篇文章还是木有看懂...于是看代码,但平时看的都是matlab的代码,而作者有提供C++的,因为要用到opencv,而opencv的配置,每次配置好了,但下次想跑还是不行,所以,真的不是太喜欢这,不过,opencv又确实是真的很好啊~~~
这个是跟踪时候的主函数,先贴出来
/*
* Struck: Structured Output Tracking with Kernels
*
* Code to accompany the paper:
* Struck: Structured Output Tracking with Kernels
* Sam Hare, Amir Saffari, Philip H. S. Torr
* International Conference on Computer Vision (ICCV), 2011
*
* Copyright (C) 2011 Sam Hare, Oxford Brookes University, Oxford, UK
*
* This file is part of Struck.
*
* Struck is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Struck is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Struck. If not, see <http://www.gnu.org/licenses/>.
*
*/
#include "Tracker.h"
#include "Config.h"
#include <iostream>
#include <fstream>
#include <opencv/cv.h>
#include <opencv/highgui.h>
using namespace std;
using namespace cv;
static const int kLiveBoxWidth = 80;
static const int kLiveBoxHeight = 80;
void rectangle(Mat& rMat, const FloatRect& rRect, const Scalar& rColour)
{
IntRect r(rRect);
rectangle(rMat, Point(r.XMin(), r.YMin()), Point(r.XMax(), r.YMax()), rColour);
}
int main(int argc, char* argv[])
{
// read config file
string configPath = "config.txt";
if (argc > 1)
{
configPath = argv[1];
}
Config conf(configPath);
cout << conf << endl;
if (conf.features.size() == 0)
{
cout << "error: no features specified in config" << endl;
return EXIT_FAILURE;
}
ofstream outFile;
if (conf.resultsPath != "")
{
outFile.open(conf.resultsPath.c_str(), ios::out);
if (!outFile)
{
cout << "error: could not open results file: " << conf.resultsPath << endl;
return EXIT_FAILURE;
}
}
// if no sequence specified then use the camera
bool useCamera = (conf.sequenceName == "");
VideoCapture cap;
int startFrame = -1;
int endFrame = -1;
FloatRect initBB;
string imgFormat;
float scaleW = 1.f;
float scaleH = 1.f;
if (useCamera)
{
if (!cap.open(0))
{
cout << "error: could not start camera capture" << endl;
return EXIT_FAILURE;
}
startFrame = 0;
endFrame = INT_MAX;
Mat tmp;
cap >> tmp;
scaleW = (float)conf.frameWidth/tmp.cols;
scaleH = (float)conf.frameHeight/tmp.rows;
// conf.frameWidth = 320
// conf.frameHeight = 240
initBB = IntRect(conf.frameWidth/2-kLiveBoxWidth/2, conf.frameHeight/2-kLiveBoxHeight/2, kLiveBoxWidth, kLiveBoxHeight);
cout << "press 'i' to initialise tracker" << endl;
}
else
{
// parse frames file
string framesFilePath = conf.sequenceBasePath+"/"+conf.sequenceName+"/"+conf.sequenceName+"_frames.txt";
ifstream framesFile(framesFilePath.c_str(), ios::in);
if (!framesFile)
{
cout << "error: could not open sequence frames file: " << framesFilePath << endl;
return EXIT_FAILURE;
}
string framesLine;
getline(framesFile, framesLine);
sscanf(framesLine.c_str(), "%d,%d", &startFrame, &endFrame);
if (framesFile.fail() || startFrame == -1 || endFrame == -1)
{
cout << "error: could not parse sequence frames file" << endl;
return EXIT_FAILURE;
}
imgFormat = conf.sequenceBasePath+"/"+conf.sequenceName+"/imgs/img%05d.png";
// read first frame to get size
char imgPath[256];
sprintf(imgPath, imgFormat.c_str(), startFrame);
Mat tmp = cv::imread(imgPath, 0);
scaleW = (float)conf.frameWidth/tmp.cols;
scaleH = (float)conf.frameHeight/tmp.rows;
// read init box from ground truth file
string gtFilePath = conf.sequenceBasePath+"/"+conf.sequenceName+"/"+conf.sequenceName+"_gt.txt";
ifstream gtFile(gtFilePath.c_str(), ios::in);
if (!gtFile)
{
cout << "error: could not open sequence gt file: " << gtFilePath << endl;
return EXIT_FAILURE;
}
string gtLine;
getline(gtFile, gtLine);
float xmin = -1.f;
float ymin = -1.f;
float width = -1.f;
float height = -1.f;
sscanf(gtLine.c_str(), "%f,%f,%f,%f", &xmin, &ymin, &width, &height);
if (gtFile.fail() || xmin < 0.f || ymin < 0.f || width < 0.f || height < 0.f)
{
cout << "error: could not parse sequence gt file" << endl;
return EXIT_FAILURE;
}
initBB = FloatRect(xmin*scaleW, ymin*scaleH, width*scaleW, height*scaleH);
}
Tracker tracker(conf);
if (!conf.quietMode)
{
namedWindow("result");
}
Mat result(conf.frameHeight, conf.frameWidth, CV_8UC3);
bool paused = false;
bool doInitialise = false;
srand(conf.seed);
for (int frameInd = startFrame; frameInd <= endFrame; ++frameInd)
{
Mat frame;
if (useCamera)
{
Mat frameOrig;
cap >> frameOrig;
resize(frameOrig, frame, Size(conf.frameWidth, conf.frameHeight));
flip(frame, frame, 1);
frame.copyTo(result);
if (doInitialise)
{
if (tracker.IsInitialised())
{
tracker.Reset();
}
else
{
tracker.Initialise(frame, initBB);
}
doInitialise = false;
}
else if (!tracker.IsInitialised())
{
rectangle(result, initBB, CV_RGB(255, 255, 255));
}
}
else
{
char imgPath[256];
sprintf(imgPath, imgFormat.c_str(), frameInd);
Mat frameOrig = cv::imread(imgPath, 0);
if (frameOrig.empty())
{
cout << "error: could not read frame: " << imgPath << endl;
return EXIT_FAILURE;
}
resize(frameOrig, frame, Size(conf.frameWidth, conf.frameHeight));
cvtColor(frame, result, CV_GRAY2RGB);
if (frameInd == startFrame)
{
tracker.Initialise(frame, initBB);
}
}
if (tracker.IsInitialised())
{
tracker.Track(frame);
if (!conf.quietMode && conf.debugMode)
{
tracker.Debug();
}
rectangle(result, tracker.GetBB(), CV_RGB(0, 255, 0));
if (outFile)
{
const FloatRect& bb = tracker.GetBB();
outFile << bb.XMin()/scaleW << "," << bb.YMin()/scaleH << "," << bb.Width()/scaleW << "," << bb.Height()/scaleH << endl;
}
}
if (!conf.quietMode)
{
imshow("result", result);
int key = waitKey(paused ? 0 : 1);
if (key != -1)
{
if (key == 27 || key == 113) // esc q
{
break;
}
else if (key == 112) // p
{
paused = !paused;
}
else if (key == 105 && useCamera)
{
doInitialise = true;
}
}
if (conf.debugMode && frameInd == endFrame)
{
cout << "\n\nend of sequence, press any key to exit" << endl;
waitKey();
}
}
}
if (outFile.is_open())
{
outFile.close();
}
return EXIT_SUCCESS;
}
最重要的在于tracker里面,因此,贴到下面
/*
* Struck: Structured Output Tracking with Kernels
*
* Code to accompany the paper:
* Struck: Structured Output Tracking with Kernels
* Sam Hare, Amir Saffari, Philip H. S. Torr
* International Conference on Computer Vision (ICCV), 2011
*
* Copyright (C) 2011 Sam Hare, Oxford Brookes University, Oxford, UK
*
* This file is part of Struck.
*
* Struck is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Struck is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Struck. If not, see <http://www.gnu.org/licenses/>.
*
*/
#include "Tracker.h"
#include "Config.h"
#include "ImageRep.h"
#include "Sampler.h"
#include "Sample.h"
#include "GraphUtils/GraphUtils.h"
#include "HaarFeatures.h"
#include "RawFeatures.h"
#include "HistogramFeatures.h"
#include "MultiFeatures.h"
#include "Kernels.h"
#include "LaRank.h"
#include <opencv/cv.h>
#include <opencv/highgui.h>
#include <Eigen/Core>
#include <vector>
#include <algorithm>
using namespace cv;
using namespace std;
using namespace Eigen;
Tracker::Tracker(const Config& conf) :
m_config(conf),
m_initialised(false),
m_pLearner(0),
m_debugImage(2*conf.searchRadius+1, 2*conf.searchRadius+1, CV_32FC1),
m_needsIntegralImage(false)
{
Reset();
}
Tracker::~Tracker()
{
delete m_pLearner;
for (int i = 0; i < (int)m_features.size(); ++i)
{
delete m_features[i];
delete m_kernels[i];
}
}
void Tracker::Reset()
{
m_initialised = false;
m_debugImage.setTo(0);
if (m_pLearner) delete m_pLearner;
for (int i = 0; i < (int)m_features.size(); ++i)
{
delete m_features[i];
delete m_kernels[i];
}
m_features.clear();
m_kernels.clear();
m_needsIntegralImage = false;
m_needsIntegralHist = false;
int numFeatures = m_config.features.size();
vector<int> featureCounts;
for (int i = 0; i < numFeatures; ++i)
{
switch (m_config.features[i].feature)
{
case Config::kFeatureTypeHaar:
m_features.push_back(new HaarFeatures(m_config));
m_needsIntegralImage = true;
break;
case Config::kFeatureTypeRaw:
m_features.push_back(new RawFeatures(m_config));
break;
case Config::kFeatureTypeHistogram:
m_features.push_back(new HistogramFeatures(m_config));
m_needsIntegralHist = true;
break;
}
featureCounts.push_back(m_features.back()->GetCount());
switch (m_config.features[i].kernel)
{
case Config::kKernelTypeLinear:
m_kernels.push_back(new LinearKernel());
break;
case Config::kKernelTypeGaussian:
m_kernels.push_back(new GaussianKernel(m_config.features[i].params[0]));
break;
case Config::kKernelTypeIntersection:
m_kernels.push_back(new IntersectionKernel());
break;
case Config::kKernelTypeChi2:
m_kernels.push_back(new Chi2Kernel());
break;
}
}
if (numFeatures > 1)
{
MultiFeatures* f = new MultiFeatures(m_features);
m_features.push_back(f);
MultiKernel* k = new MultiKernel(m_kernels, featureCounts);
m_kernels.push_back(k);
}
m_pLearner = new LaRank(m_config, *m_features.back(), *m_kernels.back());
}
void Tracker::Initialise(const cv::Mat& frame, FloatRect bb)
{
m_bb = IntRect(bb);
ImageRep image(frame, m_needsIntegralImage, m_needsIntegralHist);
for (int i = 0; i < 1; ++i)
{
UpdateLearner(image);
}
m_initialised = true;
}
void Tracker::Track(const cv::Mat& frame)
{
assert(m_initialised);
ImageRep image(frame, m_needsIntegralImage, m_needsIntegralHist);
vector<FloatRect> rects = Sampler::PixelSamples(m_bb, m_config.searchRadius);
vector<FloatRect> keptRects;
keptRects.reserve(rects.size());
for (int i = 0; i < (int)rects.size(); ++i)
{
if (!rects[i].IsInside(image.GetRect())) continue;
keptRects.push_back(rects[i]);
}
MultiSample sample(image, keptRects);
vector<double> scores;
m_pLearner->Eval(sample, scores);
double bestScore = -DBL_MAX;
int bestInd = -1;
for (int i = 0; i < (int)keptRects.size(); ++i)
{
if (scores[i] > bestScore)
{
bestScore = scores[i];
bestInd = i;
}
}
UpdateDebugImage(keptRects, m_bb, scores);
if (bestInd != -1)
{
m_bb = keptRects[bestInd];
UpdateLearner(image);
#if VERBOSE
cout << "track score: " << bestScore << endl;
#endif
}
}
void Tracker::UpdateDebugImage(const vector<FloatRect>& samples, const FloatRect& centre, const vector<double>& scores)
{
double mn = VectorXd::Map(&scores[0], scores.size()).minCoeff();
double mx = VectorXd::Map(&scores[0], scores.size()).maxCoeff();
m_debugImage.setTo(0);
for (int i = 0; i < (int)samples.size(); ++i)
{
int x = (int)(samples[i].XMin() - centre.XMin());
int y = (int)(samples[i].YMin() - centre.YMin());
m_debugImage.at<float>(m_config.searchRadius+y, m_config.searchRadius+x) = (float)((scores[i]-mn)/(mx-mn));
}
}
void Tracker::Debug()
{
imshow("tracker", m_debugImage);
m_pLearner->Debug();
}
void Tracker::UpdateLearner(const ImageRep& image)
{
// note these return the centre sample at index 0
vector<FloatRect> rects = Sampler::RadialSamples(m_bb, 2*m_config.searchRadius, 5, 16);
//vector<FloatRect> rects = Sampler::PixelSamples(m_bb, 2*m_config.searchRadius, true);
vector<FloatRect> keptRects;
keptRects.push_back(rects[0]); // the true sample
for (int i = 1; i < (int)rects.size(); ++i)
{
if (!rects[i].IsInside(image.GetRect())) continue;
keptRects.push_back(rects[i]);
}
#if VERBOSE
cout << keptRects.size() << " samples" << endl;
#endif
MultiSample sample(image, keptRects);
m_pLearner->Update(sample, 0);
}