因为项目的原因接触了一下KCF跟踪程序
KCF跟踪网上已经被各路人等吹来吹去快上天了,而且又被一堆人拿来水论文用来毕业
所以KCF及其变种还是很多的
在此分析一种多尺度的KCF跟踪程序,废话不多说,上代码
其中多尺度主要定义在kcftracker.cpp文件中的KCFTracker::update函数里面,通过检测一个大点一个小点的尺度,比较三个峰值的结果来进行多尺度的适应,简单,高效,改动少。
tracker.h:定义Tracker类
#pragma once
#include <opencv2/opencv.hpp>
#include <string>
class Tracker
{
public:
Tracker() {}
virtual ~Tracker() { }
virtual void init(const cv::Rect &roi, cv::Mat image) = 0;
virtual cv::Rect update( cv::Mat image)=0;
protected:
cv::Rect_<float> _roi;
};
kcftracker.hpp:继承Tracker定义KCFTracker
#pragma once
#include "tracker.h"
#ifndef _OPENCV_KCFTRACKER_HPP_
#define _OPENCV_KCFTRACKER_HPP_
#endif
class KCFTracker : public Tracker
{
public:
// Constructor
// 构造KCF跟踪器的类
KCFTracker(bool hog = true, // 使用hog特征
bool fixed_window = true, //使用固定窗口大小
bool multiscale = true, //使用多尺度
bool lab = true); //使用lab色空间特征
// Initialize tracker
// 初始化跟踪器, roi 是目标初始框的引用, image 是进入跟踪的第一帧图像
virtual void init(const cv::Rect &roi, cv::Mat image);
// Update position based on the new frame
// 使用新一帧更新图像, image 是新一帧图像
virtual cv::Rect update(cv::Mat image);
float interp_factor; // linear interpolation factor for adaptation
// 自适应的线性插值因子,会因为hog,lab的选择而变化
float sigma; // gaussian kernel bandwidth
// 高斯卷积核带宽,会因为hog,lab的选择而变化
float lambda; // regularization
// 正则化,0.0001
int cell_size; // HOG cell size
// HOG元胞数组尺寸,4
int cell_sizeQ; // cell size^2, to avoid repeated operations
// 元胞数组内像素数目,16,为了计算省事
float padding; // extra area surrounding the target
// 目标扩展出来的区域,2.5
float output_sigma_factor; // bandwidth of gaussian target
// 高斯目标的带宽,不同hog,lab会不同
int template_size; // template size
// 模板大小,在计算_tmpl_sz时,
// 较大变成被归一成96,而较小边长按比例缩小
float scale_step; // scale step for multi-scale estimation
// 多尺度估计的时候的尺度步长
float scale_weight; // to downweight detection scores of other scales for added stability
// 为了增加其他尺度检测时的稳定性,给检测结果峰值做一定衰减,为原来的0.95倍
protected:
// Detect object in the current frame.
// 检测当前帧的目标
//z是前一阵的训练/第一帧的初始化结果, x是当前帧当前尺度下的特征, peak_value是检测结果峰值
cv::Point2f detect(cv::Mat z, cv::Mat x, float &peak_value);
// train tracker with a single image
// 使用当前图像的检测结果进行训练
// x是当前帧当前尺度下的特征, train_interp_factor是interp_factor
void train(cv::Mat x, float train_interp_factor);
// Evaluates a Gaussian kernel with bandwidth SIGMA for all relative shifts between input images X and Y,
// which must both be MxN. They must also be periodic (ie., pre-processed with a cosine window).
// 使用带宽SIGMA计算高斯卷积核以用于所有图像X和Y之间的相对位移
// 必须都是MxN大小。二者必须都是周期的(即,通过一个cos窗口进行预处理)
cv::Mat gaussianCorrelation(cv::Mat x1, cv::Mat x2);
// Create Gaussian Peak. Function called only in the first frame.
// 创建高斯峰函数,函数只在第一帧的时候执行
cv::Mat createGaussianPeak(int sizey, int sizex);
// Obtain sub-window from image, with replication-padding and extract features
// 从图像得到子窗口,通过赋值填充并检测特征
cv::Mat getFeatures(const cv::Mat & image, bool inithann, float scale_adjust = 1.0f);
// Initialize Hanning window. Function called only in the first frame.
// 初始化hanning窗口。函数只在第一帧被执行。
void createHanningMats();
// Calculate sub-pixel peak for one dimension
// 计算一维亚像素峰值
float subPixelPeak(float left, float center, float right);
cv::Mat _alphaf; // 初始化/训练结果alphaf,用于检测部分中结果的计算
cv::Mat _prob; // 初始化结果prob,不再更改,用于训练
cv::Mat _tmpl; // 初始化/训练的结果,用于detect的z
cv::Mat _num; // 貌似都被注释掉了
cv::Mat _den; // 貌似都被注释掉了
cv::Mat _labCentroids; // lab质心数组
private:
int size_patch[3]; // hog特征的sizeY,sizeX,numFeatures
cv::Mat hann; // createHanningMats()的计算结果
cv::Size _tmpl_sz; // hog元胞对应的数组大小
float _scale; // 修正成_tmpl_sz后的尺度大小
int _gaussian_size; // 未引用???
bool _hogfeatures; // hog标志位
bool _labfeatures; // lab标志位
};
kcftracker.cpp:KCFTracker类中函数的定义
#ifndef _KCFTRACKER_HEADERS
#include "kcftracker.hpp"
#include "ffttools.hpp"
#include "recttools.hpp"
#include "fhog.hpp"
#include "labdata.hpp"
#endif
// Constructor
// 初始化KCF类参数
KCFTracker::KCFTracker(bool hog, bool fixed_window, bool multiscale, bool lab)
{
// Parameters equal in all cases
lambda = 0.0001;
padding = 2.5;
//output_sigma_factor = 0.1;
output_sigma_factor = 0.125;
if (hog) { // HOG
// VOT
interp_factor = 0.012;
sigma = 0.6;
// TPAMI
//interp_factor = 0.02;
//sigma = 0.5;
cell_size = 4;
_hogfeatures = true;
if (lab) {
interp_factor = 0.005;
sigma = 0.4;
//output_sigma_factor = 0.025;
output_sigma_factor = 0.1;
_labfeatures = true;
_labCentroids = cv::Mat(nClusters, 3, CV_32FC1, &data);
cell_sizeQ = cell_size*cell_size;
}
else{
_labfeatures = false;
}
}
else { // RAW
interp_factor = 0.075;
sigma = 0.2;
cell_size = 1;
_hogfeatures = false;
if (lab) {
printf("Lab features are only used with HOG features.\n");
_labfeatures = false;
}
}
if (multiscale) { // multiscale
template_size = 96;
//template_size = 100;
scale_step = 1.20;//1.05;
scale_weight = 0.95;
if (!fixed_window) {
//printf("Multiscale does not support non-fixed window.\n");
fixed_window = true;
}
}
else if (fixed_window) { // fit correction without multiscale
template_size = 96;
//template_size = 100;
scale_step = 1;
}
else {
template_size = 1;
scale_step = 1;
}
}
// Initialize tracker
// 使用第一帧和它的跟踪框,初始化KCF跟踪器
void KCFTracker::init(const cv::Rect &roi, cv::Mat image)
{
_roi = roi;
assert(roi.width >= 0 && roi.height >= 0);
_tmpl = getFeatures(image, 1); // 获取特征,在train里面每帧修改
_prob = createGaussianPeak(size_patch[0], size_patch[1]); // 这个不修改了,只初始化一次
_alphaf = cv::Mat(size_patch[0], size_patch[1], CV_32FC2, float(0)); // 获取特征,在train里面每帧修改
//_num = cv::Mat(size_patch[0], size_patch[1], CV_32FC2, float(0));
//_den = cv::Mat(size_patch[0], size_patch[1], CV_32FC2, float(0));
train(_tmpl, 1.0); // train with initial frame
}
// Update position based on the new frame
// 基于当前帧更新目标位置
cv::Rect KCFTracker::update(cv::Mat image)
{
// 修正边界
if (_roi.x + _roi.width <= 0) _roi.x = -_roi.width + 1;
if (_roi.y + _roi.height <= 0) _roi.y = -_roi.height + 1;
if (_roi.x >= image.cols - 1) _roi.x = image.cols - 2;
if (_roi.y >= image.rows - 1) _roi.y = image.rows - 2;
// 跟踪框中心
float cx = _roi.x + _roi.width / 2.0f;
float cy = _roi.y + _roi.height / 2.0f;
// 尺度不变时检测峰值结果
float peak_value;
cv::Point2f res = detect(_tmpl, getFeatures(image, 0, 1.0f), peak_value);
// 略大尺度和略小尺度进行检测
if (scale_step != 1) {
// Test at a smaller _scale
// 使用一个小点的尺度测试
float new_peak_value;
cv::Point2f new_res = detect(_tmpl, getFeatures(image, 0, 1.0f / scale_step), new_peak_value);
// 做减益还比同尺度大就认为是目标
if (scale_weight * new_peak_value > peak_value) {
res = new_res;
peak_value = new_peak_value;
_scale /= scale_step;
_roi.width /= scale_step;
_roi.height /= scale_step;
}
// Test at a bigger _scale
new_res = detect(_tmpl, getFeatures(image, 0, scale_step), new_peak_value);
if (scale_weight * new_peak_value > peak_value) {
res = new_res;
peak_value = new_peak_value;
_scale *= scale_step;
_roi.width *= scale_step;
_roi.height *= scale_step;
}
}
// Adjust by cell size and _scale
// 因为返回的只有中心坐标,使用尺度和中心坐标调整目标框
_roi.x = cx - _roi.width / 2.0f + ((float) res.x * cell_size * _scale);
_roi.y = cy - _roi.height / 2.0f + ((float) res.y * cell_size * _scale);
if (_roi.x >= image.cols - 1) _roi.x = image.cols - 1;
if (_roi.y >= image.rows - 1) _roi.y = image.rows - 1;
if (_roi.x + _roi.width <= 0) _roi.x = -_roi.width + 2;
if (_roi.y + _roi.height <= 0) _roi.y = -_roi.height + 2;
assert(_roi.width >= 0 && _roi.height >= 0);
// 使用当前的检测框来训练样本参数
cv::Mat x = getFeatures(image, 0);
train(x, interp_factor);
return _roi; //返回检测框
}
// Detect object in the current frame.
// z为前一帧样本
// x为当前帧图像
// peak_value为输出的峰值
cv::Point2f KCFTracker::detect(cv::Mat z, cv::Mat x, float &peak_value)
{
using namespace FFTTools;
// 做变换得到计算结果res
cv::Mat k = gaussianCorrelation(x, z);
cv::Mat res = (real(fftd(complexMultiplication(_alphaf, fftd(k)), true)));
//minMaxLoc only accepts doubles for the peak, and integer points for the coordinates
// 使用opencv的minMaxLoc来定位峰值坐标位置
cv::Point2i pi;
double pv;
cv::minMaxLoc(res, NULL, &pv, NULL, &pi);
peak_value = (float) pv;
//subpixel peak estimation, coordinates will be non-integer
// 子像素峰值检测,坐标是非整形的
cv::Point2f p((float)pi.x, (float)pi.y);
if (pi.x > 0 && pi.x < res.cols-1) {
p.x += subPixelPeak(res.at<float>(pi.y, pi.x-1), peak_value, res.at<float>(pi.y, pi.x+1));
}
if (pi.y > 0 && pi.y < res.rows-1) {
p.y += subPixelPeak(res.at<float>(pi.y-1, pi.x), peak_value, res.at<float>(pi.y+1, pi.x));
}
p.x -= (res.cols) / 2;
p.y -= (res.rows) / 2;
return p;
}
// train tracker with a single image
// 使用图像进行训练,得到当前帧的_tmpl,_alphaf
void KCFTracker::train(cv::Mat x, float train_interp_factor)
{
using namespace FFTTools;
cv::Mat k = gaussianCorrelation(x, x);
cv::Mat alphaf = complexDivision(_prob, (fftd(k) + lambda));
_tmpl = (1 - train_interp_factor) * _tmpl + (train_interp_factor) * x;
_alphaf = (1 - train_interp_factor) * _alphaf + (train_interp_factor) * alphaf;
/*cv::Mat kf = fftd(gaussianCorrelation(x, x));
cv::Mat num = complexMultiplication(kf, _prob);
cv::Mat den = complexMultiplication(kf, kf + lambda);
_tmpl = (1 - train_interp_factor) * _tmpl + (train_interp_factor) * x;
_num = (1 - train_interp_factor) * _num + (train_interp_factor) * num;
_den = (1 - train_interp_factor) * _den + (train_interp_factor) * den;
_alphaf = complexDivision(_num, _den);*/
}
// Evaluates a Gaussian kernel with bandwidth SIGMA for all relative shifts between input images X and Y,
// which must both be MxN. They must also be periodic (ie., pre-processed with a cosine window).
// 使用带宽SIGMA计算高斯卷积核以用于所有图像X和Y之间的相对位移
// 必须都是MxN大小。二者必须都是周期的(即,通过一个cos窗口进行预处理)
cv::Mat KCFTracker::gaussianCorrelation(cv::Mat x1, cv::Mat x2)
{
using namespace FFTTools;
cv::Mat c = cv::Mat( cv::Size(size_patch[1], size_patch[0]), CV_32F, cv::Scalar(0) );
// HOG features
if (_hogfeatures) {
cv::Mat caux;
cv::Mat x1aux;
cv::Mat x2aux;
for (int i = 0; i < size_patch[2]; i++) {
x1aux = x1.row(i); // Procedure do deal with cv::Mat multichannel bug
x1aux = x1aux.reshape(1, size_patch[0]);
x2aux = x2.row(i).reshape(1, size_patch[0]);
cv::mulSpectrums(fftd(x1aux), fftd(x2aux), caux, 0, true);
caux = fftd(caux, true);
rearrange(caux);
caux.convertTo(caux,CV_32F);
c = c + real(caux);
}
}
// Gray features
else {
cv::mulSpectrums(fftd(x1), fftd(x2), c, 0, true);
c = fftd(c, true);
rearrange(c);
c = real(c);
}
cv::Mat d;
cv::max(( (cv::sum(x1.mul(x1))[0] + cv::sum(x2.mul(x2))[0])- 2. * c) / (size_patch[0]*size_patch[1]*size_patch[2]) , 0, d);
cv::Mat k;
cv::exp((-d / (sigma * sigma)), k);
return k;
}
// Create Gaussian Peak. Function called only in the first frame.
// 创建高斯峰函数,函数只在第一帧的时候执行
cv::Mat KCFTracker::createGaussianPeak(int sizey, int sizex)
{
cv::Mat_<float> res(sizey, sizex);
int syh = (sizey) / 2;
int sxh = (sizex) / 2;
float output_sigma = std::sqrt((float) sizex * sizey) / padding * output_sigma_factor;
float mult = -0.5 / (output_sigma * output_sigma);
for (int i = 0; i < sizey; i++)
for (int j = 0; j < sizex; j++)
{
int ih = i - syh;
int jh = j - sxh;
res(i, j) = std::exp(mult * (float) (ih * ih + jh * jh));
}
return FFTTools::fftd(res);
}
// Obtain sub-window from image, with replication-padding and extract features
// 从图像得到子窗口,通过赋值填充并检测特征
cv::Mat KCFTracker::getFeatures(const cv::Mat & image, bool inithann, float scale_adjust)
{
cv::Rect extracted_roi;
float cx = _roi.x + _roi.width / 2;
float cy = _roi.y + _roi.height / 2;
// 初始化hanning窗, 其实只执行一次,只在第一帧的时候inithann=1
if (inithann) {
int padded_w = _roi.width * padding;
int padded_h = _roi.height * padding;
// 按照长宽比例修改长宽大小,保证比较大的边为template_size大小
if (template_size > 1) { // Fit largest dimension to the given template size
if (padded_w >= padded_h) //fit to width
_scale = padded_w / (float) template_size;
else
_scale = padded_h / (float) template_size;
_tmpl_sz.width = padded_w / _scale;
_tmpl_sz.height = padded_h / _scale;
}
else { //No template size given, use ROI size
_tmpl_sz.width = padded_w;
_tmpl_sz.height = padded_h;
_scale = 1;
// original code from paper:
/*if (sqrt(padded_w * padded_h) >= 100) { //Normal size
_tmpl_sz.width = padded_w;
_tmpl_sz.height = padded_h;
_scale = 1;
}
else { //ROI is too big, track at half size
_tmpl_sz.width = padded_w / 2;
_tmpl_sz.height = padded_h / 2;
_scale = 2;
}*/
}
// 设置_tmpl_sz的长宽:向上取原来长宽的最小2*cell_size倍
// 其中,较大边长为104
if (_hogfeatures) {
// Round to cell size and also make it even
_tmpl_sz.width = ( ( (int)(_tmpl_sz.width / (2 * cell_size)) ) * 2 * cell_size ) + cell_size*2;
_tmpl_sz.height = ( ( (int)(_tmpl_sz.height / (2 * cell_size)) ) * 2 * cell_size ) + cell_size*2;
}
else { //Make number of pixels even (helps with some logic involving half-dimensions)
_tmpl_sz.width = (_tmpl_sz.width / 2) * 2;
_tmpl_sz.height = (_tmpl_sz.height / 2) * 2;
}
}
// 检测区域大小
extracted_roi.width = scale_adjust * _scale * _tmpl_sz.width;
extracted_roi.height = scale_adjust * _scale * _tmpl_sz.height;
// center roi with new size
// 检测区域坐上角坐标
extracted_roi.x = cx - extracted_roi.width / 2;
extracted_roi.y = cy - extracted_roi.height / 2;
// 提取目标区域像素,超边界则做填充
cv::Mat FeaturesMap;
cv::Mat z = RectTools::subwindow(image, extracted_roi, cv::BORDER_REPLICATE);
// 按照比例缩小边界大小
if (z.cols != _tmpl_sz.width || z.rows != _tmpl_sz.height) {
cv::resize(z, z, _tmpl_sz);
}
// HOG features
// 提取HOG特征点
if (_hogfeatures) {
IplImage z_ipl = z;
CvLSVMFeatureMapCaskade *map; // 申请指针
getFeatureMaps(&z_ipl, cell_size, &map); // 给map进行赋值
normalizeAndTruncate(map,0.2f); // 归一化
PCAFeatureMaps(map); // 由HOG特征变为PCA-HOG
size_patch[0] = map->sizeY;
size_patch[1] = map->sizeX;
size_patch[2] = map->numFeatures;
FeaturesMap = cv::Mat(cv::Size(map->numFeatures,map->sizeX*map->sizeY), CV_32F, map->map); // Procedure do deal with cv::Mat multichannel bug
FeaturesMap = FeaturesMap.t();
freeFeatureMapObject(&map);
// Lab features
// 我测试结果,带有Lab特征在一些跟踪环节效果并不好
if (_labfeatures) {
cv::Mat imgLab;
cvtColor(z, imgLab, CV_BGR2Lab);
unsigned char *input = (unsigned char*)(imgLab.data);
// Sparse output vector
cv::Mat outputLab = cv::Mat(_labCentroids.rows, size_patch[0]*size_patch[1], CV_32F, float(0));
int cntCell = 0;
// Iterate through each cell
for (int cY = cell_size; cY < z.rows-cell_size; cY+=cell_size){
for (int cX = cell_size; cX < z.cols-cell_size; cX+=cell_size){
// Iterate through each pixel of cell (cX,cY)
for(int y = cY; y < cY+cell_size; ++y){
for(int x = cX; x < cX+cell_size; ++x){
// Lab components for each pixel
float l = (float)input[(z.cols * y + x) * 3];
float a = (float)input[(z.cols * y + x) * 3 + 1];
float b = (float)input[(z.cols * y + x) * 3 + 2];
// Iterate trough each centroid
float minDist = FLT_MAX;
int minIdx = 0;
float *inputCentroid = (float*)(_labCentroids.data);
for(int k = 0; k < _labCentroids.rows; ++k){
float dist = ( (l - inputCentroid[3*k]) * (l - inputCentroid[3*k]) )
+ ( (a - inputCentroid[3*k+1]) * (a - inputCentroid[3*k+1]) )
+ ( (b - inputCentroid[3*k+2]) * (b - inputCentroid[3*k+2]) );
if(dist < minDist){
minDist = dist;
minIdx = k;
}
}
// Store result at output
outputLab.at<float>(minIdx, cntCell) += 1.0 / cell_sizeQ;
//((float*) outputLab.data)[minIdx * (size_patch[0]*size_patch[1]) + cntCell] += 1.0 / cell_sizeQ;
}
}
cntCell++;
}
}
// Update size_patch[2] and add features to FeaturesMap
size_patch[2] += _labCentroids.rows;
FeaturesMap.push_back(outputLab);
}
}
else {
FeaturesMap = RectTools::getGrayImage(z);
FeaturesMap -= (float) 0.5; // In Paper;
size_patch[0] = z.rows;
size_patch[1] = z.cols;
size_patch[2] = 1;
}
if (inithann) {
createHanningMats();
}
FeaturesMap = hann.mul(FeaturesMap);
return FeaturesMap;
}
// Initialize Hanning window. Function called only in the first frame.
// 初始化hanning窗,只执行一次,使用opencv函数做的
void KCFTracker::createHanningMats()
{
cv::Mat hann1t = cv::Mat(cv::Size(size_patch[1],1), CV_32F, cv::Scalar(0));
cv::Mat hann2t = cv::Mat(cv::Size(1,size_patch[0]), CV_32F, cv::Scalar(0));
for (int i = 0; i < hann1t.cols; i++)
hann1t.at<float > (0, i) = 0.5 * (1 - std::cos(2 * 3.14159265358979323846 * i / (hann1t.cols - 1)));
for (int i = 0; i < hann2t.rows; i++)
hann2t.at<float > (i, 0) = 0.5 * (1 - std::cos(2 * 3.14159265358979323846 * i / (hann2t.rows - 1)));
cv::Mat hann2d = hann2t * hann1t;
// HOG features
if (_hogfeatures) {
cv::Mat hann1d = hann2d.reshape(1,1); // Procedure do deal with cv::Mat multichannel bug
hann = cv::Mat(cv::Size(size_patch[0]*size_patch[1], size_patch[2]), CV_32F, cv::Scalar(0));
for (int i = 0; i < size_patch[2]; i++) {
for (int j = 0; j<size_patch[0]*size_patch[1]; j++) {
hann.at<float>(i,j) = hann1d.at<float>(0,j);
}
}
}
// Gray features
else {
hann = hann2d;
}
}
// Calculate sub-pixel peak for one dimension
// 使用幅值做差来定位峰值的位置,返回的是需要改变的偏移量大小
float KCFTracker::subPixelPeak(float left, float center, float right)
{
float divisor = 2 * center - right - left;
if (divisor == 0)
return 0;
return 0.5 * (right - left) / divisor;
}
fhog.hpp:hog的相关函数
#ifndef _FHOG_H_
#define _FHOG_H_
#include <stdio.h>
//#include "_lsvmc_types.h"
//#include "_lsvmc_error.h"
//#include "_lsvmc_routine.h"
//#include "opencv2/imgproc.hpp"
#include "opencv2/imgproc/imgproc_c.h"
//modified from "_lsvmc_types.h"
// DataType: STRUCT featureMap
// FEATURE MAP DESCRIPTION
// Rectangular map (sizeX x sizeY),
// every cell stores feature vector (dimension = numFeatures)
// map - matrix of feature vectors
// to set and get feature vectors (i,j)
// used formula map[(j * sizeX + i) * p + k], where
// k - component of feature vector in cell (i, j)
typedef struct{
int sizeX;
int sizeY;
int numFeatures;
float *map;
} CvLSVMFeatureMapCaskade;
#include "float.h"
#define PI CV_PI
#define EPS 0.000001
#define F_MAX FLT_MAX
#define F_MIN -FLT_MAX
// The number of elements in bin
// The number of sectors in gradient histogram building
#define NUM_SECTOR 9
// The number of levels in image resize procedure
// We need Lambda levels to resize image twice
#define LAMBDA 10
// Block size. Used in feature pyramid building procedure
#define SIDE_LENGTH 8
#define VAL_OF_TRUNCATE 0.2f
//modified from "_lsvm_error.h"
#define LATENT_SVM_OK 0
#define LATENT_SVM_MEM_NULL 2
#define DISTANCE_TRANSFORM_OK 1
#define DISTANCE_TRANSFORM_GET_INTERSECTION_ERROR -1
#define DISTANCE_TRANSFORM_ERROR -2
#define DISTANCE_TRANSFORM_EQUAL_POINTS -3
#define LATENT_SVM_GET_FEATURE_PYRAMID_FAILED -4
#define LATENT_SVM_SEARCH_OBJECT_FAILED -5
#define LATENT_SVM_FAILED_SUPERPOSITION -6
#define FILTER_OUT_OF_BOUNDARIES -7
#define LATENT_SVM_TBB_SCHEDULE_CREATION_FAILED -8
#define LATENT_SVM_TBB_NUMTHREADS_NOT_CORRECT -9
#define FFT_OK 2
#define FFT_ERROR -10
#define LSVM_PARSER_FILE_NOT_FOUND -11
/*
// Getting feature map for the selected subimage
//
// API
// int getFeatureMaps(const IplImage * image, const int k, featureMap **map);
// INPUT
// image - selected subimage
// k - size of cells
// OUTPUT
// map - feature map
// RESULT
// Error status
*/
int getFeatureMaps(const IplImage * image, const int k, CvLSVMFeatureMapCaskade **map);
/*
// Feature map Normalization and Truncation
//
// API
// int normalizationAndTruncationFeatureMaps(featureMap *map, const float alfa);
// INPUT
// map - feature map
// alfa - truncation threshold
// OUTPUT
// map - truncated and normalized feature map
// RESULT
// Error status
*/
int normalizeAndTruncate(CvLSVMFeatureMapCaskade *map, const float alfa);
/*
// Feature map reduction
// In each cell we reduce dimension of the feature vector
// according to original paper special procedure
//
// API
// int PCAFeatureMaps(featureMap *map)
// INPUT
// map - feature map
// OUTPUT
// map - feature map
// RESULT
// Error status
*/
int PCAFeatureMaps(CvLSVMFeatureMapCaskade *map);
//modified from "lsvmc_routine.h"
int allocFeatureMapObject(CvLSVMFeatureMapCaskade **obj, const int sizeX, const int sizeY,
const int p);
int freeFeatureMapObject (CvLSVMFeatureMapCaskade **obj);
#endif
fhog.cpp:fhog的相关函数定义
#include "fhog.hpp"
#ifdef HAVE_TBB
#include <tbb/tbb.h>
#include "tbb/parallel_for.h"
#include "tbb/blocked_range.h"
#endif
#ifndef max
#define max(a,b) (((a) > (b)) ? (a) : (b))
#endif
#ifndef min
#define min(a,b) (((a) < (b)) ? (a) : (b))
#endif
/*
// Getting feature map for the selected subimage
//
// API
// int getFeatureMaps(const IplImage * image, const int k, featureMap **map);
// INPUT
// image - selected subimage
// k - size of cells
// OUTPUT
// map - feature map
// RESULT
// Error status
*/
int getFeatureMaps(const IplImage* image, const int k, CvLSVMFeatureMapCaskade **map)
{
int sizeX, sizeY;
int p, px, stringSize;
int height, width, numChannels;
int i, j, kk, c, ii, jj, d;
float * datadx, * datady;
int ch;
float magnitude, x, y, tx, ty;
IplImage * dx, * dy;
int *nearest;
float *w, a_x, b_x;
// 横向和纵向的3长度{-1,0,1}矩阵
float kernel[3] = {-1.f, 0.f, 1.f};
CvMat kernel_dx = cvMat(1, 3, CV_32F, kernel); // 1*3的矩阵
CvMat kernel_dy = cvMat(3, 1, CV_32F, kernel); // 3*1的矩阵
float * r;
int * alfa;
float boundary_x[NUM_SECTOR + 1]; // boundary_x[10]
float boundary_y[NUM_SECTOR + 1];
float max, dotProd;
int maxi;
height = image->height;
width = image->width ;
numChannels = image->nChannels;
// 采样图像大小的Ipl图像
dx = cvCreateImage(cvSize(image->width, image->height),
IPL_DEPTH_32F, 3);
dy = cvCreateImage(cvSize(image->width, image->height),
IPL_DEPTH_32F, 3);
// 向下取整的(边界大小/4),k = cell_size
sizeX = width / k;
sizeY = height / k;
px = 3 * NUM_SECTOR; // px=3*9=27
p = px;
stringSize = sizeX * p; // stringSize = 27*sizeX
allocFeatureMapObject(map, sizeX, sizeY, p);
// image:输入图像.
// dx:输出图像.
// kernel_dx:卷积核, 单通道浮点矩阵. 如果想要应用不同的核于不同的通道,先用 cvSplit 函数分解图像到单个色彩通道上,然后单独处理。
// cvPoint(-1, 0):核的锚点表示一个被滤波的点在核内的位置。 锚点应该处于核内部。缺省值 (-1,-1) 表示锚点在核中心。
// 函数 cvFilter2D 对图像进行线性滤波,支持 In-place 操作。当核运算部分超出输入图像时,函数从最近邻的图像内部象素差值得到边界外面的象素值。
cvFilter2D(image, dx, &kernel_dx, cvPoint(-1, 0)); // 起点在(x-1,y),按x方向滤波
cvFilter2D(image, dy, &kernel_dy, cvPoint(0, -1)); // 起点在(x,y-1),按y方向滤波
// 初始化cos和sin函数
float arg_vector;
for(i = 0; i <= NUM_SECTOR; i++)
{
arg_vector = ( (float) i ) * ( (float)(PI) / (float)(NUM_SECTOR) );
boundary_x[i] = cosf(arg_vector);
boundary_y[i] = sinf(arg_vector);
}/*for(i = 0; i <= NUM_SECTOR; i++) */
r = (float *)malloc( sizeof(float) * (width * height));
alfa = (int *)malloc( sizeof(int ) * (width * height * 2));
for(j = 1; j < height - 1; j++)
{
// 每一行起点
datadx = (float*)(dx->imageData + dx->widthStep * j);
datady = (float*)(dy->imageData + dy->widthStep * j);
// 遍历该行每一个元素
for(i = 1; i < width - 1; i++)
{
// 第一颜色通道
c = 0;
x = (datadx[i * numChannels + c]);
y = (datady[i * numChannels + c]);
r[j * width + i] =sqrtf(x * x + y * y);
// 使用向量大小最大的通道替代储存值
for(ch = 1; ch < numChannels; ch++)
{
tx = (datadx[i * numChannels + ch]);
ty = (datady[i * numChannels + ch]);
magnitude = sqrtf(tx * tx + ty * ty);
if(magnitude > r[j * width + i])
{
r[j * width + i] = magnitude;
c = ch;
x = tx;
y = ty;
}
}/*for(ch = 1; ch < numChannels; ch++)*/
// 使用sqrt(cos*x*cos*x+sin*y*sin*y)最大的替换掉
max = boundary_x[0] * x + boundary_y[0] * y; // max = 1*x+0*y;
maxi = 0;
for (kk = 0; kk < NUM_SECTOR; kk++)
{
dotProd = boundary_x[kk] * x + boundary_y[kk] * y;
if (dotProd > max)
{
max = dotProd;
maxi = kk;
}
else
{
if (-dotProd > max)
{
max = -dotProd;
maxi = kk + NUM_SECTOR; // 周期的,所以+一个周期NUM_SECTOR
}
}
}
// 看起来有点像储存cos和sin的周期值
alfa[j * width * 2 + i * 2 ] = maxi % NUM_SECTOR;
alfa[j * width * 2 + i * 2 + 1] = maxi;
}/*for(i = 0; i < width; i++)*/
}/*for(j = 0; j < height; j++)*/
nearest = (int *)malloc(sizeof(int ) * k);
w = (float*)malloc(sizeof(float) * (k * 2));
// nearest=[-1,-1,1,1];
for(i = 0; i < k / 2; i++)
{
nearest[i] = -1;
}/*for(i = 0; i < k / 2; i++)*/
for(i = k / 2; i < k; i++)
{
nearest[i] = 1;
}/*for(i = k / 2; i < k; i++)*/
// 这算的都是啥?我怎么没在算法上看见这一段???
// 1/a 1/b
// w[1]=_______ w[2]=_______
// 1/a+1/b 1/a+1/b
for(j = 0; j < k / 2; j++)
{
b_x = k / 2 + j + 0.5f;
a_x = k / 2 - j - 0.5f;
w[j * 2 ] = 1.0f/a_x * ((a_x * b_x) / ( a_x + b_x));
w[j * 2 + 1] = 1.0f/b_x * ((a_x * b_x) / ( a_x + b_x));
}/*for(j = 0; j < k / 2; j++)*/
for(j = k / 2; j < k; j++)
{
a_x = j - k / 2 + 0.5f;
b_x =-j + k / 2 - 0.5f + k;
w[j * 2 ] = 1.0f/a_x * ((a_x * b_x) / ( a_x + b_x));
w[j * 2 + 1] = 1.0f/b_x * ((a_x * b_x) / ( a_x + b_x));
}/*for(j = k / 2; j < k; j++)*/
// 计算梯度的公式好像和算法不太一样,应该是经过了某种离奇的推倒
for(i = 0; i < sizeY; i++)
{
for(j = 0; j < sizeX; j++)
{
for(ii = 0; ii < k; ii++)
{
for(jj = 0; jj < k; jj++)
{
if ((i * k + ii > 0) &&
(i * k + ii < height - 1) &&
(j * k + jj > 0) &&
(j * k + jj < width - 1))
{
d = (k * i + ii) * width + (j * k + jj);
(*map)->map[ i * stringSize + j * (*map)->numFeatures + alfa[d * 2 ]] +=
r[d] * w[ii * 2] * w[jj * 2];
(*map)->map[ i * stringSize + j * (*map)->numFeatures + alfa[d * 2 + 1] + NUM_SECTOR] +=
r[d] * w[ii * 2] * w[jj * 2];
if ((i + nearest[ii] >= 0) &&
(i + nearest[ii] <= sizeY - 1))
{
(*map)->map[(i + nearest[ii]) * stringSize + j * (*map)->numFeatures + alfa[d * 2 ] ] +=
r[d] * w[ii * 2 + 1] * w[jj * 2 ];
(*map)->map[(i + nearest[ii]) * stringSize + j * (*map)->numFeatures + alfa[d * 2 + 1] + NUM_SECTOR] +=
r[d] * w[ii * 2 + 1] * w[jj * 2 ];
}
if ((j + nearest[jj] >= 0) &&
(j + nearest[jj] <= sizeX - 1))
{
(*map)->map[i * stringSize + (j + nearest[jj]) * (*map)->numFeatures + alfa[d * 2 ] ] +=
r[d] * w[ii * 2] * w[jj * 2 + 1];
(*map)->map[i * stringSize + (j + nearest[jj]) * (*map)->numFeatures + alfa[d * 2 + 1] + NUM_SECTOR] +=
r[d] * w[ii * 2] * w[jj * 2 + 1];
}
if ((i + nearest[ii] >= 0) &&
(i + nearest[ii] <= sizeY - 1) &&
(j + nearest[jj] >= 0) &&
(j + nearest[jj] <= sizeX - 1))
{
(*map)->map[(i + nearest[ii]) * stringSize + (j + nearest[jj]) * (*map)->numFeatures + alfa[d * 2 ] ] +=
r[d] * w[ii * 2 + 1] * w[jj * 2 + 1];
(*map)->map[(i + nearest[ii]) * stringSize + (j + nearest[jj]) * (*map)->numFeatures + alfa[d * 2 + 1] + NUM_SECTOR] +=
r[d] * w[ii * 2 + 1] * w[jj * 2 + 1];
}
}
}/*for(jj = 0; jj < k; jj++)*/
}/*for(ii = 0; ii < k; ii++)*/
}/*for(j = 1; j < sizeX - 1; j++)*/
}/*for(i = 1; i < sizeY - 1; i++)*/
// 释放变量
cvReleaseImage(&dx);
cvReleaseImage(&dy);
free(w);
free(nearest);
free(r);
free(alfa);
return LATENT_SVM_OK;
}
/*
// Feature map Normalization and Truncation
//
// API
// int normalizeAndTruncate(featureMap *map, const float alfa);
// INPUT
// map - feature map
// alfa - truncation threshold
// OUTPUT
// map - truncated and normalized feature map
// RESULT
// Error status
*/
//
int normalizeAndTruncate(CvLSVMFeatureMapCaskade *map, const float alfa)
{
int i,j, ii;
int sizeX, sizeY, p, pos, pp, xp, pos1, pos2;
float * partOfNorm; // norm of C(i, j)
float * newData;
float valOfNorm;
sizeX = map->sizeX;
sizeY = map->sizeY;
partOfNorm = (float *)malloc (sizeof(float) * (sizeX * sizeY));
p = NUM_SECTOR;
xp = NUM_SECTOR * 3;
pp = NUM_SECTOR * 12;
for(i = 0; i < sizeX * sizeY; i++)
{
valOfNorm = 0.0f;
pos = i * map->numFeatures;
for(j = 0; j < p; j++)
{
valOfNorm += map->map[pos + j] * map->map[pos + j];
}/*for(j = 0; j < p; j++)*/
partOfNorm[i] = valOfNorm;
}/*for(i = 0; i < sizeX * sizeY; i++)*/
sizeX -= 2;
sizeY -= 2;
newData = (float *)malloc (sizeof(float) * (sizeX * sizeY * pp));
//normalization
for(i = 1; i <= sizeY; i++)
{
for(j = 1; j <= sizeX; j++)
{
valOfNorm = sqrtf(
partOfNorm[(i )*(sizeX + 2) + (j )] +
partOfNorm[(i )*(sizeX + 2) + (j + 1)] +
partOfNorm[(i + 1)*(sizeX + 2) + (j )] +
partOfNorm[(i + 1)*(sizeX + 2) + (j + 1)]) + FLT_EPSILON;
pos1 = (i ) * (sizeX + 2) * xp + (j ) * xp;
pos2 = (i-1) * (sizeX ) * pp + (j-1) * pp;
for(ii = 0; ii < p; ii++)
{
newData[pos2 + ii ] = map->map[pos1 + ii ] / valOfNorm;
}/*for(ii = 0; ii < p; ii++)*/
for(ii = 0; ii < 2 * p; ii++)
{
newData[pos2 + ii + p * 4] = map->map[pos1 + ii + p] / valOfNorm;
}/*for(ii = 0; ii < 2 * p; ii++)*/
valOfNorm = sqrtf(
partOfNorm[(i )*(sizeX + 2) + (j )] +
partOfNorm[(i )*(sizeX + 2) + (j + 1)] +
partOfNorm[(i - 1)*(sizeX + 2) + (j )] +
partOfNorm[(i - 1)*(sizeX + 2) + (j + 1)]) + FLT_EPSILON;
for(ii = 0; ii < p; ii++)
{
newData[pos2 + ii + p ] = map->map[pos1 + ii ] / valOfNorm;
}/*for(ii = 0; ii < p; ii++)*/
for(ii = 0; ii < 2 * p; ii++)
{
newData[pos2 + ii + p * 6] = map->map[pos1 + ii + p] / valOfNorm;
}/*for(ii = 0; ii < 2 * p; ii++)*/
valOfNorm = sqrtf(
partOfNorm[(i )*(sizeX + 2) + (j )] +
partOfNorm[(i )*(sizeX + 2) + (j - 1)] +
partOfNorm[(i + 1)*(sizeX + 2) + (j )] +
partOfNorm[(i + 1)*(sizeX + 2) + (j - 1)]) + FLT_EPSILON;
for(ii = 0; ii < p; ii++)
{
newData[pos2 + ii + p * 2] = map->map[pos1 + ii ] / valOfNorm;
}/*for(ii = 0; ii < p; ii++)*/
for(ii = 0; ii < 2 * p; ii++)
{
newData[pos2 + ii + p * 8] = map->map[pos1 + ii + p] / valOfNorm;
}/*for(ii = 0; ii < 2 * p; ii++)*/
valOfNorm = sqrtf(
partOfNorm[(i )*(sizeX + 2) + (j )] +
partOfNorm[(i )*(sizeX + 2) + (j - 1)] +
partOfNorm[(i - 1)*(sizeX + 2) + (j )] +
partOfNorm[(i - 1)*(sizeX + 2) + (j - 1)]) + FLT_EPSILON;
for(ii = 0; ii < p; ii++)
{
newData[pos2 + ii + p * 3 ] = map->map[pos1 + ii ] / valOfNorm;
}/*for(ii = 0; ii < p; ii++)*/
for(ii = 0; ii < 2 * p; ii++)
{
newData[pos2 + ii + p * 10] = map->map[pos1 + ii + p] / valOfNorm;
}/*for(ii = 0; ii < 2 * p; ii++)*/
}/*for(j = 1; j <= sizeX; j++)*/
}/*for(i = 1; i <= sizeY; i++)*/
//truncation
for(i = 0; i < sizeX * sizeY * pp; i++)
{
if(newData [i] > alfa) newData [i] = alfa;
}/*for(i = 0; i < sizeX * sizeY * pp; i++)*/
//swop data
map->numFeatures = pp;
map->sizeX = sizeX;
map->sizeY = sizeY;
free (map->map);
free (partOfNorm);
map->map = newData;
return LATENT_SVM_OK;
}
/*
// Feature map reduction
// In each cell we reduce dimension of the feature vector
// according to original paper special procedure
//
// API
// int PCAFeatureMaps(featureMap *map)
// INPUT
// map - feature map
// OUTPUT
// map - feature map
// RESULT
// Error status
*/
int PCAFeatureMaps(CvLSVMFeatureMapCaskade *map)
{
int i,j, ii, jj, k;
int sizeX, sizeY, p, pp, xp, yp, pos1, pos2;
float * newData;
float val;
float nx, ny;
// 初始化Hog所需要的参数
sizeX = map->sizeX;
sizeY = map->sizeY;
p = map->numFeatures; // 3*9
pp = NUM_SECTOR * 3 + 4; // 9*3+4
yp = 4;
xp = NUM_SECTOR;
nx = 1.0f / sqrtf((float)(xp * 2));
ny = 1.0f / sqrtf((float)(yp ));
// 新建一个map->map的指针
newData = (float *)malloc (sizeof(float) * (sizeX * sizeY * pp));
for(i = 0; i < sizeY; i++)
{
for(j = 0; j < sizeX; j++)
{
pos1 = ((i)*sizeX + j)*p;
pos2 = ((i)*sizeX + j)*pp;
k = 0;
for(jj = 0; jj < xp * 2; jj++)
{
val = 0;
for(ii = 0; ii < yp; ii++)
{
val += map->map[pos1 + yp * xp + ii * xp * 2 + jj];
}/*for(ii = 0; ii < yp; ii++)*/
newData[pos2 + k] = val * ny;
k++;
}/*for(jj = 0; jj < xp * 2; jj++)*/
for(jj = 0; jj < xp; jj++)
{
val = 0;
for(ii = 0; ii < yp; ii++)
{
val += map->map[pos1 + ii * xp + jj];
}/*for(ii = 0; ii < yp; ii++)*/
newData[pos2 + k] = val * ny;
k++;
}/*for(jj = 0; jj < xp; jj++)*/
for(ii = 0; ii < yp; ii++)
{
val = 0;
for(jj = 0; jj < 2 * xp; jj++)
{
val += map->map[pos1 + yp * xp + ii * xp * 2 + jj];
}/*for(jj = 0; jj < xp; jj++)*/
newData[pos2 + k] = val * nx;
k++;
} /*for(ii = 0; ii < yp; ii++)*/
}/*for(j = 0; j < sizeX; j++)*/
}/*for(i = 0; i < sizeY; i++)*/
//swop data
// 将计算结果,指针复制到结果输出的map上
map->numFeatures = pp;
free (map->map);
map->map = newData;
return LATENT_SVM_OK; // return 0
}
//modified from "lsvmc_routine.cpp"
// 根据输入,转换成指针**obj,其中(*obj)->map为sizeX * sizeY * numFeatures大小
int allocFeatureMapObject(CvLSVMFeatureMapCaskade **obj, const int sizeX,
const int sizeY, const int numFeatures)
{
int i;
(*obj) = (CvLSVMFeatureMapCaskade *)malloc(sizeof(CvLSVMFeatureMapCaskade));
(*obj)->sizeX = sizeX;
(*obj)->sizeY = sizeY;
(*obj)->numFeatures = numFeatures; // 27
(*obj)->map = (float *) malloc(sizeof (float) *
(sizeX * sizeY * numFeatures));
for(i = 0; i < sizeX * sizeY * numFeatures; i++)
{
(*obj)->map[i] = 0.0f;
}
return LATENT_SVM_OK;
}
// 释放自己定义的CvLSVMFeatureMapCaskade数据
int freeFeatureMapObject (CvLSVMFeatureMapCaskade **obj)
{
if(*obj == NULL) return LATENT_SVM_MEM_NULL;
free((*obj)->map);
free(*obj);
(*obj) = NULL;
return LATENT_SVM_OK;
}
ffttools.hpp
#pragma once
//#include <cv.h>
#ifndef _OPENCV_FFTTOOLS_HPP_
#define _OPENCV_FFTTOOLS_HPP_
#endif
//NOTE: FFTW support is still shaky, disabled for now.
/*#ifdef USE_FFTW
#include <fftw3.h>
#endif*/
namespace FFTTools
{
// Previous declarations, to avoid warnings
cv::Mat fftd(cv::Mat img, bool backwards = false);
cv::Mat real(cv::Mat img);
cv::Mat imag(cv::Mat img);
cv::Mat magnitude(cv::Mat img);
cv::Mat complexMultiplication(cv::Mat a, cv::Mat b);
cv::Mat complexDivision(cv::Mat a, cv::Mat b);
void rearrange(cv::Mat &img);
void normalizedLogTransform(cv::Mat &img);
// 做dfft
cv::Mat fftd(cv::Mat img, bool backwards)
{
/*
#ifdef USE_FFTW
fftw_complex * fm = (fftw_complex*) fftw_malloc(sizeof (fftw_complex) * img.cols * img.rows);
fftw_plan p = fftw_plan_dft_2d(img.rows, img.cols, fm, fm, backwards ? 1 : -1, 0 * FFTW_ESTIMATE);
if (img.channels() == 1)
{
for (int i = 0; i < img.rows; i++)
for (int j = 0; j < img.cols; j++)
{
fm[i * img.cols + j][0] = img.at<float>(i, j);
fm[i * img.cols + j][1] = 0;
}
}
else
{
assert(img.channels() == 2);
for (int i = 0; i < img.rows; i++)
for (int j = 0; j < img.cols; j++)
{
fm[i * img.cols + j][0] = img.at<cv::Vec2d > (i, j)[0];
fm[i * img.cols + j][1] = img.at<cv::Vec2d > (i, j)[1];
}
}
fftw_execute(p);
cv::Mat res(img.rows, img.cols, CV_64FC2);
for (int i = 0; i < img.rows; i++)
for (int j = 0; j < img.cols; j++)
{
res.at<cv::Vec2d > (i, j)[0] = fm[i * img.cols + j][0];
res.at<cv::Vec2d > (i, j)[1] = fm[i * img.cols + j][1];
// _iout(fm[i * img.cols + j][0]);
}
if (backwards)res *= 1.d / (float) (res.cols * res.rows);
fftw_free(p);
fftw_free(fm);
return res;
#else
*/
if (img.channels() == 1)
{
cv::Mat planes[] = {cv::Mat_<float> (img), cv::Mat_<float>::zeros(img.size())};
//cv::Mat planes[] = {cv::Mat_<double> (img), cv::Mat_<double>::zeros(img.size())};
cv::merge(planes, 2, img);
}
cv::dft(img, img, backwards ? (cv::DFT_INVERSE | cv::DFT_SCALE) : 0 );
return img;
/*#endif*/
}
// 图像实部
cv::Mat real(cv::Mat img)
{
std::vector<cv::Mat> planes;
cv::split(img, planes);
return planes[0];
}
// 图像虚部
cv::Mat imag(cv::Mat img)
{
std::vector<cv::Mat> planes;
cv::split(img, planes);
return planes[1];
}
// 复数的模
cv::Mat magnitude(cv::Mat img)
{
cv::Mat res;
std::vector<cv::Mat> planes;
cv::split(img, planes); // planes[0] = Re(DFT(I), planes[1] = Im(DFT(I)) 实部虚部分开
if (planes.size() == 1) res = cv::abs(img);
else if (planes.size() == 2) cv::magnitude(planes[0], planes[1], res); // planes[0] = magnitude 求复数的模(sqrt(x(I)^2+y(I)^2))
else assert(0);
return res;
}
// 复数乘法
cv::Mat complexMultiplication(cv::Mat a, cv::Mat b)
{
std::vector<cv::Mat> pa;
std::vector<cv::Mat> pb;
cv::split(a, pa); //通道拆分
cv::split(b, pb); //通道拆分
std::vector<cv::Mat> pres;
pres.push_back(pa[0].mul(pb[0]) - pa[1].mul(pb[1]));
pres.push_back(pa[0].mul(pb[1]) + pa[1].mul(pb[0]));
cv::Mat res;
cv::merge(pres, res); //合并通道
return res;
}
// 复数除法
cv::Mat complexDivision(cv::Mat a, cv::Mat b)
{
std::vector<cv::Mat> pa;
std::vector<cv::Mat> pb;
cv::split(a, pa); // 通道拆分
cv::split(b, pb); // 通道拆分
cv::Mat divisor = 1. / (pb[0].mul(pb[0]) + pb[1].mul(pb[1])); // 实部虚部相乘求倒数
std::vector<cv::Mat> pres;
pres.push_back((pa[0].mul(pb[0]) + pa[1].mul(pb[1])).mul(divisor));
pres.push_back((pa[1].mul(pb[0]) + pa[0].mul(pb[1])).mul(divisor));
cv::Mat res;
cv::merge(pres, res); // 合并通道
return res;
}
// 区域搬移
// 0 1 变成-> 3 2
// 2 3 1 0
void rearrange(cv::Mat &img)
{
// img = img(cv::Rect(0, 0, img.cols & -2, img.rows & -2));
int cx = img.cols / 2;
int cy = img.rows / 2;
cv::Mat q0(img, cv::Rect(0, 0, cx, cy)); // Top-Left - Create a ROI per quadrant
cv::Mat q1(img, cv::Rect(cx, 0, cx, cy)); // Top-Right
cv::Mat q2(img, cv::Rect(0, cy, cx, cy)); // Bottom-Left
cv::Mat q3(img, cv::Rect(cx, cy, cx, cy)); // Bottom-Right
cv::Mat tmp; // swap quadrants (Top-Left with Bottom-Right)
q0.copyTo(tmp);
q3.copyTo(q0);
tmp.copyTo(q3);
q1.copyTo(tmp); // swap quadrant (Top-Right with Bottom-Left)
q2.copyTo(q1);
tmp.copyTo(q2);
}
/*
template < typename type>
cv::Mat fouriertransFull(const cv::Mat & in)
{
return fftd(in);
cv::Mat planes[] = {cv::Mat_<type > (in), cv::Mat_<type>::zeros(in.size())};
cv::Mat t;
assert(planes[0].depth() == planes[1].depth());
assert(planes[0].size == planes[1].size);
cv::merge(planes, 2, t);
cv::dft(t, t);
//cv::normalize(a, a, 0, 1, CV_MINMAX);
//cv::normalize(t, t, 0, 1, CV_MINMAX);
// cv::imshow("a",real(a));
// cv::imshow("b",real(t));
// cv::waitKey(0);
return t;
}*/
// 做对数变换初始化
void normalizedLogTransform(cv::Mat &img)
{
img = cv::abs(img);
img += cv::Scalar::all(1);
cv::log(img, img);
// cv::normalize(img, img, 0, 1, CV_MINMAX);
}
}
recttools.hpp
#pragma once
//#include <cv.h>
#include <math.h>
#ifndef _OPENCV_RECTTOOLS_HPP_
#define _OPENCV_RECTTOOLS_HPP_
#endif
namespace RectTools
{
// 取中心坐标
template <typename t>
inline cv::Vec<t, 2 > center(const cv::Rect_<t> &rect)
{
return cv::Vec<t, 2 > (rect.x + rect.width / (t) 2, rect.y + rect.height / (t) 2);
}
// 取右边界
template <typename t>
inline t x2(const cv::Rect_<t> &rect)
{
return rect.x + rect.width;
}
// 取下边界
template <typename t>
inline t y2(const cv::Rect_<t> &rect)
{
return rect.y + rect.height;
}
// 按scalex重新定义框的大小
template <typename t>
inline void resize(cv::Rect_<t> &rect, float scalex, float scaley = 0)
{
if (!scaley)scaley = scalex;
rect.x -= rect.width * (scalex - 1.f) / 2.f;
rect.width *= scalex;
rect.y -= rect.height * (scaley - 1.f) / 2.f;
rect.height *= scaley;
}
// 把rect限制在limit的范围内
template <typename t>
inline void limit(cv::Rect_<t> &rect, cv::Rect_<t> limit)
{
if (rect.x + rect.width > limit.x + limit.width)rect.width = (limit.x + limit.width - rect.x);
if (rect.y + rect.height > limit.y + limit.height)rect.height = (limit.y + limit.height - rect.y);
if (rect.x < limit.x)
{
rect.width -= (limit.x - rect.x);
rect.x = limit.x;
}
if (rect.y < limit.y)
{
rect.height -= (limit.y - rect.y);
rect.y = limit.y;
}
if(rect.width<0)rect.width=0;
if(rect.height<0)rect.height=0;
}
// 接口重定义
template <typename t>
inline void limit(cv::Rect_<t> &rect, t width, t height, t x = 0, t y = 0)
{
limit(rect, cv::Rect_<t > (x, y, width, height));
}
// 取超出来的边界
template <typename t>
inline cv::Rect getBorder(const cv::Rect_<t > &original, cv::Rect_<t > & limited)
{
cv::Rect_<t > res;
res.x = limited.x - original.x;
res.y = limited.y - original.y;
res.width = x2(original) - x2(limited);
res.height = y2(original) - y2(limited);
assert(res.x >= 0 && res.y >= 0 && res.width >= 0 && res.height >= 0);
return res;
}
// 取窗口大小
inline cv::Mat subwindow(const cv::Mat &in, const cv::Rect & window, int borderType = cv::BORDER_CONSTANT)
{
cv::Rect cutWindow = window;
RectTools::limit(cutWindow, in.cols, in.rows);
if (cutWindow.height <= 0 || cutWindow.width <= 0)assert(0); //return cv::Mat(window.height,window.width,in.type(),0) ;
cv::Rect border = RectTools::getBorder(window, cutWindow);
cv::Mat res = in(cutWindow);
if (border != cv::Rect(0, 0, 0, 0))
{
// 使用Opencv来复制图像并填充边界
cv::copyMakeBorder(res, res, border.y, border.height, border.x, border.width, borderType);
}
return res;
}
// 获取灰度图像并将像素灰度转换成0-1之间的小数
inline cv::Mat getGrayImage(cv::Mat img)
{
cv::cvtColor(img, img, CV_BGR2GRAY);
img.convertTo(img, CV_32F, 1 / 255.f);
return img;
}
}
runtracker.hpp:跟踪主程序
#include <iostream>
#include <fstream>
#include <sstream>
#include <algorithm>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include "kcftracker.hpp"
#include <dirent.h>
#include <sys/time.h>
using namespace std;
using namespace cv;
int main(int argc, char* argv[]){
struct timeval tv, tz,tv0, tz0;
if (argc > 5) return -1; // 输入大于5个参数
bool HOG = true; // 是否使用hog特征
bool FIXEDWINDOW = false; // 是否使用修正窗口
bool MULTISCALE = true; // 是否使用多尺度
bool SILENT = true; // 是否不做显示
bool LAB = false; // 是否使用LAB颜色
for(int i = 0; i < argc; i++){
if ( strcmp (argv[i], "hog") == 0 )
HOG = true;
if ( strcmp (argv[i], "fixed_window") == 0 )
FIXEDWINDOW = true;
if ( strcmp (argv[i], "singlescale") == 0 )
MULTISCALE = false;
if ( strcmp (argv[i], "show") == 0 )
SILENT = false;
if ( strcmp (argv[i], "lab") == 0 ){
LAB = true;
HOG = true;
}
if ( strcmp (argv[i], "gray") == 0 )
HOG = false;
}
// Create KCFTracker object
// 创建KCF跟踪器
KCFTracker tracker(HOG, FIXEDWINDOW, MULTISCALE, LAB);
// Frame readed
// 当前帧
Mat frame;
// Tracker results
// 跟踪结果目标框
Rect result;
// Path to list.txt
// images.txt的路径,用于读取图像
ifstream listFile;
string fileName = "images.txt";
listFile.open(fileName);
// Read groundtruth for the 1st frame
// 读取第一帧的目标区域
ifstream groundtruthFile;
string groundtruth = "region.txt";
groundtruthFile.open(groundtruth);
string firstLine;
getline(groundtruthFile, firstLine);
groundtruthFile.close();
istringstream ss(firstLine);
// Read groundtruth like a dumb
// 从给定的第一帧目标框读入四个顶点的坐标
float x1, y1, x2, y2, x3, y3, x4, y4;
char ch;
ss >> x1;
ss >> ch;
ss >> y1;
ss >> ch;
ss >> x2;
ss >> ch;
ss >> y2;
ss >> ch;
ss >> x3;
ss >> ch;
ss >> y3;
ss >> ch;
ss >> x4;
ss >> ch;
ss >> y4;
// Using min and max of X and Y for groundtruth rectangle
// 使用四个顶点计算出目标框
float xMin = min(x1, min(x2, min(x3, x4)));
float yMin = min(y1, min(y2, min(y3, y4)));
float width = max(x1, max(x2, max(x3, x4))) - xMin;
float height = max(y1, max(y2, max(y3, y4))) - yMin;
// Read Images
// 读图像
ifstream listFramesFile;
string listFrames = "images.txt";
listFramesFile.open(listFrames);
string frameName;
// Write Results
// 将结果写入output.txt
ofstream resultsFile;
string resultsPath = "output.txt";
resultsFile.open(resultsPath);
// Frame counter
// 帧号计数
int nFrames = 0;
char name_write[15] = {};
while ( getline(listFramesFile, frameName) ){
frameName = frameName;
// Read each frame from the list
// 读取列表上面的帧
frame = imread(frameName, CV_LOAD_IMAGE_COLOR);
// First frame, give the groundtruth to the tracker
// 使用第一帧和目标框来初始化跟踪器
if (nFrames == 0) {
tracker.init( Rect(xMin, yMin, width, height), frame );
rectangle( frame, Point( xMin, yMin ), Point( xMin+width, yMin+height), Scalar( 0, 255, 255 ), 1, 8 );
resultsFile << xMin << "," << yMin << "," << width << "," << height << endl;
}
// Update
// 更新当前帧的结果
else{
result = tracker.update(frame);
rectangle( frame, Point( result.x, result.y ), Point( result.x+result.width, result.y+result.height), Scalar( 0, 255, 255 ), 1, 8 );
resultsFile << result.x << "," << result.y << "," << result.width << "," << result.height << endl;
}
nFrames++;
// 显示并保存
if (!SILENT){
imshow("Image", frame);
waitKey(1);
sprintf(name_write, "%04d.jpg", nFrames);
imwrite(name_write, frame);
}
}
// 关闭文件
resultsFile.close();
listFile.close();
}