版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/jacke121/article/details/54315050
原文: http://blog.csdn.net/bisheng250/article/details/53672247?_t_t_t=0.27866495959460735
其中多尺度主要定义在kcftracker.cpp文件中的KCFTracker::update函数里面,通过检测一个大点一个小点的尺度,比较三个峰值的结果来进行多尺度的适应,简单,高效,改动少。
tracker.h:定义Tracker类
- #pragma once
- #include <opencv2/opencv.hpp>
- #include <string>
- class Tracker
- {
- public:
- Tracker() {}
- virtual ~Tracker() { }
- virtual void init(const cv::Rect &roi, cv::Mat image) = 0;
- virtual cv::Rect update( cv::Mat image)=0;
- protected:
- cv::Rect_<float> _roi;
- };
kcftracker.hpp:继承Tracker定义KCFTracker
- #pragma once
- #include "tracker.h"
- #ifndef _OPENCV_KCFTRACKER_HPP_
- #define _OPENCV_KCFTRACKER_HPP_
- #endif
- class KCFTracker : public Tracker
- {
- public:
- // Constructor // 构造KCF跟踪器的类
- KCFTracker(bool hog = true, // 使用hog特征
- bool fixed_window = true, //使用固定窗口大小
- bool multiscale = true, //使用多尺度
- bool lab = true); //使用lab色空间特征
- // Initialize tracker
- // 初始化跟踪器, roi 是目标初始框的引用, image 是进入跟踪的第一帧图像
- virtual void init(const cv::Rect &roi, cv::Mat image);
- // Update position based on the new frame
- // 使用新一帧更新图像, image 是新一帧图像
- virtual cv::Rect update(cv::Mat image);
- float interp_factor; // linear interpolation factor for adaptation
- // 自适应的线性插值因子,会因为hog,lab的选择而变化
- float sigma; // gaussian kernel bandwidth
- // 高斯卷积核带宽,会因为hog,lab的选择而变化
- float lambda; // regularization
- // 正则化,0.0001
- int cell_size; // HOG cell size
- // HOG元胞数组尺寸,4
- int cell_sizeQ; // cell size^2, to avoid repeated operations
- // 元胞数组内像素数目,16,为了计算省事
- float padding; // extra area surrounding the target
- // 目标扩展出来的区域,2.5
- float output_sigma_factor; // bandwidth of gaussian target
- // 高斯目标的带宽,不同hog,lab会不同
- int template_size; // template size
- // 模板大小,在计算_tmpl_sz时,
- // 较大变成被归一成96,而较小边长按比例缩小
- float scale_step; // scale step for multi-scale estimation
- // 多尺度估计的时候的尺度步长
- float scale_weight; // to downweight detection scores of other scales for added stability
- // 为了增加其他尺度检测时的稳定性,给检测结果峰值做一定衰减,为原来的0.95倍
- protected:
- // Detect object in the current frame.
- // 检测当前帧的目标
- //z是前一阵的训练/第一帧的初始化结果, x是当前帧当前尺度下的特征, peak_value是检测结果峰值
- cv::Point2f detect(cv::Mat z, cv::Mat x, float &peak_value);
- // train tracker with a single image
- // 使用当前图像的检测结果进行训练 x是当前帧当前尺度下的特征, train_interp_factor是interp_factor
- void train(cv::Mat x, float train_interp_factor);
- // Evaluates a Gaussian kernel with bandwidth SIGMA for all relative shifts between input images X and Y,
- // which must both be MxN. They must also be periodic (ie., pre-processed with a cosine window).
- // 使用带宽SIGMA计算高斯卷积核以用于所有图像X和Y之间的相对位移
- // 必须都是MxN大小。二者必须都是周期的(即,通过一个cos窗口进行预处理)
- cv::Mat gaussianCorrelation(cv::Mat x1, cv::Mat x2);
- // Create Gaussian Peak. Function called only in the first frame.
- // 创建高斯峰函数,函数只在第一帧的时候执行
- cv::Mat createGaussianPeak(int sizey, int sizex);
- // Obtain sub-window from image, with replication-padding and extract features
- // 从图像得到子窗口,通过赋值填充并检测特征
- cv::Mat getFeatures(const cv::Mat & image, bool inithann, float scale_adjust = 1.0f);
- // Initialize Hanning window. Function called only in the first frame.
- // 初始化hanning窗口。函数只在第一帧被执行。
- void createHanningMats();
- // Calculate sub-pixel peak for one dimension
- // 计算一维亚像素峰值
- float subPixelPeak(float left, float center, float right);
- cv::Mat _alphaf; // 初始化/训练结果alphaf,用于检测部分中结果的计算
- cv::Mat _prob; // 初始化结果prob,不再更改,用于训练
- cv::Mat _tmpl; // 初始化/训练的结果,用于detect的z
- cv::Mat _num; // 貌似都被注释掉了
- cv::Mat _den; // 貌似都被注释掉了
- cv::Mat _labCentroids; // lab质心数组
- private:
- int size_patch[3]; // hog特征的sizeY,sizeX,numFeatures
- cv::Mat hann; // createHanningMats()的计算结果
- cv::Size _tmpl_sz; // hog元胞对应的数组大小
- float _scale; // 修正成_tmpl_sz后的尺度大小
- int _gaussian_size; // 未引用???
- bool _hogfeatures; // hog标志位
- bool _labfeatures; // lab标志位
- };
kcftracker.cpp:KCFTracker类中函数的定义
- #ifndef _KCFTRACKER_HEADERS
- #include "kcftracker.hpp"
- #include "ffttools.hpp"
- #include "recttools.hpp"
- #include "fhog.hpp"
- #include "labdata.hpp"
- #endif
- // Constructor
- // 初始化KCF类参数
- KCFTracker::KCFTracker(bool hog, bool fixed_window, bool multiscale, bool lab)
- {
- // Parameters equal in all cases
- lambda = 0.0001;
- padding = 2.5;
- //output_sigma_factor = 0.1;
- output_sigma_factor = 0.125;
- if (hog) { // HOG
- // VOT
- interp_factor = 0.012;
- sigma = 0.6;
- // TPAMI
- //interp_factor = 0.02;
- //sigma = 0.5;
- cell_size = 4;
- _hogfeatures = true;
- if (lab) {
- interp_factor = 0.005;
- sigma = 0.4;
- //output_sigma_factor = 0.025;
- output_sigma_factor = 0.1;
- _labfeatures = true;
- _labCentroids = cv::Mat(nClusters, 3, CV_32FC1, &data);
- cell_sizeQ = cell_size*cell_size;
- }
- else{
- _labfeatures = false;
- }
- }
- else { // RAW
- interp_factor = 0.075;
- sigma = 0.2;
- cell_size = 1;
- _hogfeatures = false;
- if (lab) {
- printf("Lab features are only used with HOG features.\n");
- _labfeatures = false;
- }
- }
- if (multiscale) { // multiscale
- template_size = 96;
- //template_size = 100;
- scale_step = 1.20;//1.05;
- scale_weight = 0.95;
- if (!fixed_window) {
- //printf("Multiscale does not support non-fixed window.\n");
- fixed_window = true;
- }
- }
- else if (fixed_window) { // fit correction without multiscale
- template_size = 96;
- //template_size = 100;
- scale_step = 1;
- }
- else {
- template_size = 1;
- scale_step = 1;
- }
- }
- // Initialize tracker
- // 使用第一帧和它的跟踪框,初始化KCF跟踪器
- void KCFTracker::init(const cv::Rect &roi, cv::Mat image)
- {
- _roi = roi;
- assert(roi.width >= 0 && roi.height >= 0);
- _tmpl = getFeatures(image, 1); // 获取特征,在train里面每帧修改
- _prob = createGaussianPeak(size_patch[0], size_patch[1]); // 这个不修改了,只初始化一次
- _alphaf = cv::Mat(size_patch[0], size_patch[1], CV_32FC2, float(0)); // 获取特征,在train里面每帧修改
- //_num = cv::Mat(size_patch[0], size_patch[1], CV_32FC2, float(0));
- //_den = cv::Mat(size_patch[0], size_patch[1], CV_32FC2, float(0));
- train(_tmpl, 1.0); // train with initial frame
- }
- // Update position based on the new frame
- // 基于当前帧更新目标位置
- cv::Rect KCFTracker::update(cv::Mat image)
- {
- // 修正边界
- if (_roi.x + _roi.width <= 0) _roi.x = -_roi.width + 1;
- if (_roi.y + _roi.height <= 0) _roi.y = -_roi.height + 1;
- if (_roi.x >= image.cols - 1) _roi.x = image.cols - 2;
- if (_roi.y >= image.rows - 1) _roi.y = image.rows - 2;
- // 跟踪框中心
- float cx = _roi.x + _roi.width / 2.0f;
- float cy = _roi.y + _roi.height / 2.0f;
- // 尺度不变时检测峰值结果
- float peak_value;
- cv::Point2f res = detect(_tmpl, getFeatures(image, 0, 1.0f), peak_value);
- // 略大尺度和略小尺度进行检测
- if (scale_step != 1) {
- // Test at a smaller _scale
- // 使用一个小点的尺度测试
- float new_peak_value;
- cv::Point2f new_res = detect(_tmpl, getFeatures(image, 0, 1.0f / scale_step), new_peak_value);
- // 做减益还比同尺度大就认为是目标
- if (scale_weight * new_peak_value > peak_value) {
- res = new_res;
- peak_value = new_peak_value;
- _scale /= scale_step;
- _roi.width /= scale_step;
- _roi.height /= scale_step;
- }
- // Test at a bigger _scale
- new_res = detect(_tmpl, getFeatures(image, 0, scale_step), new_peak_value);
- if (scale_weight * new_peak_value > peak_value) {
- res = new_res;
- peak_value = new_peak_value;
- _scale *= scale_step;
- _roi.width *= scale_step;
- _roi.height *= scale_step;
- }
- }
- // Adjust by cell size and _scale
- // 因为返回的只有中心坐标,使用尺度和中心坐标调整目标框
- _roi.x = cx - _roi.width / 2.0f + ((float) res.x * cell_size * _scale);
- _roi.y = cy - _roi.height / 2.0f + ((float) res.y * cell_size * _scale);
- if (_roi.x >= image.cols - 1) _roi.x = image.cols - 1;
- if (_roi.y >= image.rows - 1) _roi.y = image.rows - 1;
- if (_roi.x + _roi.width <= 0) _roi.x = -_roi.width + 2;
- if (_roi.y + _roi.height <= 0) _roi.y = -_roi.height + 2;
- assert(_roi.width >= 0 && _roi.height >= 0);
- // 使用当前的检测框来训练样本参数
- cv::Mat x = getFeatures(image, 0);
- train(x, interp_factor);
- return _roi; //返回检测框
- }
- // Detect object in the current frame.
- // z为前一帧样本
- // x为当前帧图像
- // peak_value为输出的峰值
- cv::Point2f KCFTracker::detect(cv::Mat z, cv::Mat x, float &peak_value)
- {
- using namespace FFTTools;
- // 做变换得到计算结果res
- cv::Mat k = gaussianCorrelation(x, z);
- cv::Mat res = (real(fftd(complexMultiplication(_alphaf, fftd(k)), true)));
- //minMaxLoc only accepts doubles for the peak, and integer points for the coordinates
- // 使用opencv的minMaxLoc来定位峰值坐标位置
- cv::Point2i pi;
- double pv;
- cv::minMaxLoc(res, NULL, &pv, NULL, &pi);
- peak_value = (float) pv;
- //subpixel peak estimation, coordinates will be non-integer
- // 子像素峰值检测,坐标是非整形的
- cv::Point2f p((float)pi.x, (float)pi.y);
- if (pi.x > 0 && pi.x < res.cols-1) {
- p.x += subPixelPeak(res.at<float>(pi.y, pi.x-1), peak_value, res.at<float>(pi.y, pi.x+1));
- }
- if (pi.y > 0 && pi.y < res.rows-1) {
- p.y += subPixelPeak(res.at<float>(pi.y-1, pi.x), peak_value, res.at<float>(pi.y+1, pi.x));
- }
- p.x -= (res.cols) / 2;
- p.y -= (res.rows) / 2;
- return p;
- }
- // train tracker with a single image
- // 使用图像进行训练,得到当前帧的_tmpl,_alphaf
- void KCFTracker::train(cv::Mat x, float train_interp_factor)
- {
- using namespace FFTTools;
- cv::Mat k = gaussianCorrelation(x, x);
- cv::Mat alphaf = complexDivision(_prob, (fftd(k) + lambda));
- _tmpl = (1 - train_interp_factor) * _tmpl + (train_interp_factor) * x;
- _alphaf = (1 - train_interp_factor) * _alphaf + (train_interp_factor) * alphaf;
- /*cv::Mat kf = fftd(gaussianCorrelation(x, x));
- cv::Mat num = complexMultiplication(kf, _prob);
- cv::Mat den = complexMultiplication(kf, kf + lambda);
- _tmpl = (1 - train_interp_factor) * _tmpl + (train_interp_factor) * x;
- _num = (1 - train_interp_factor) * _num + (train_interp_factor) * num;
- _den = (1 - train_interp_factor) * _den + (train_interp_factor) * den;
- _alphaf = complexDivision(_num, _den);*/
- }
- // Evaluates a Gaussian kernel with bandwidth SIGMA for all relative shifts between input images X and Y,
- // which must both be MxN. They must also be periodic (ie., pre-processed with a cosine window).
- // 使用带宽SIGMA计算高斯卷积核以用于所有图像X和Y之间的相对位移
- // 必须都是MxN大小。二者必须都是周期的(即,通过一个cos窗口进行预处理)
- cv::Mat KCFTracker::gaussianCorrelation(cv::Mat x1, cv::Mat x2)
- {
- using namespace FFTTools;
- cv::Mat c = cv::Mat( cv::Size(size_patch[1], size_patch[0]), CV_32F, cv::Scalar(0) );
- // HOG features
- if (_hogfeatures) {
- cv::Mat caux;
- cv::Mat x1aux;
- cv::Mat x2aux;
- for (int i = 0; i < size_patch[2]; i++) {
- x1aux = x1.row(i); // Procedure do deal with cv::Mat multichannel bug
- x1aux = x1aux.reshape(1, size_patch[0]);
- x2aux = x2.row(i).reshape(1, size_patch[0]);
- cv::mulSpectrums(fftd(x1aux), fftd(x2aux), caux, 0, true);
- caux = fftd(caux, true);
- rearrange(caux);
- caux.convertTo(caux,CV_32F);
- c = c + real(caux);
- }
- }
- // Gray features
- else {
- cv::mulSpectrums(fftd(x1), fftd(x2), c, 0, true);
- c = fftd(c, true);
- rearrange(c);
- c = real(c);
- }
- cv::Mat d;
- cv::max(( (cv::sum(x1.mul(x1))[0] + cv::sum(x2.mul(x2))[0])- 2. * c) / (size_patch[0]*size_patch[1]*size_patch[2]) , 0, d);
- cv::Mat k;
- cv::exp((-d / (sigma * sigma)), k);
- return k;
- }
- // Create Gaussian Peak. Function called only in the first frame.
- // 创建高斯峰函数,函数只在第一帧的时候执行
- cv::Mat KCFTracker::createGaussianPeak(int sizey, int sizex)
- {
- cv::Mat_<float> res(sizey, sizex);
- int syh = (sizey) / 2;
- int sxh = (sizex) / 2;
- float output_sigma = std::sqrt((float) sizex * sizey) / padding * output_sigma_factor;
- float mult = -0.5 / (output_sigma * output_sigma);
- for (int i = 0; i < sizey; i++)
- for (int j = 0; j < sizex; j++)
- {
- int ih = i - syh;
- int jh = j - sxh;
- res(i, j) = std::exp(mult * (float) (ih * ih + jh * jh));
- }
- return FFTTools::fftd(res);
- }
- // Obtain sub-window from image, with replication-padding and extract features
- // 从图像得到子窗口,通过赋值填充并检测特征
- cv::Mat KCFTracker::getFeatures(const cv::Mat & image, bool inithann, float scale_adjust)
- {
- cv::Rect extracted_roi;
- float cx = _roi.x + _roi.width / 2;
- float cy = _roi.y + _roi.height / 2;
- // 初始化hanning窗, 其实只执行一次,只在第一帧的时候inithann=1
- if (inithann) {
- int padded_w = _roi.width * padding;
- int padded_h = _roi.height * padding;
- // 按照长宽比例修改长宽大小,保证比较大的边为template_size大小
- if (template_size > 1) { // Fit largest dimension to the given template size
- if (padded_w >= padded_h) //fit to width
- _scale = padded_w / (float) template_size;
- else
- _scale = padded_h / (float) template_size;
- _tmpl_sz.width = padded_w / _scale;
- _tmpl_sz.height = padded_h / _scale;
- }
- else { //No template size given, use ROI size
- _tmpl_sz.width = padded_w;
- _tmpl_sz.height = padded_h;
- _scale = 1;
- // original code from paper:
- /*if (sqrt(padded_w * padded_h) >= 100) { //Normal size
- _tmpl_sz.width = padded_w;
- _tmpl_sz.height = padded_h;
- _scale = 1;
- }
- else { //ROI is too big, track at half size
- _tmpl_sz.width = padded_w / 2;
- _tmpl_sz.height = padded_h / 2;
- _scale = 2;
- }*/
- }
- // 设置_tmpl_sz的长宽:向上取原来长宽的最小2*cell_size倍
- // 其中,较大边长为104
- if (_hogfeatures) {
- // Round to cell size and also make it even
- _tmpl_sz.width = ( ( (int)(_tmpl_sz.width / (2 * cell_size)) ) * 2 * cell_size ) + cell_size*2;
- _tmpl_sz.height = ( ( (int)(_tmpl_sz.height / (2 * cell_size)) ) * 2 * cell_size ) + cell_size*2;
- }
- else { //Make number of pixels even (helps with some logic involving half-dimensions)
- _tmpl_sz.width = (_tmpl_sz.width / 2) * 2;
- _tmpl_sz.height = (_tmpl_sz.height / 2) * 2;
- }
- }
- // 检测区域大小
- extracted_roi.width = scale_adjust * _scale * _tmpl_sz.width;
- extracted_roi.height = scale_adjust * _scale * _tmpl_sz.height;
- // center roi with new size
- // 检测区域坐上角坐标
- extracted_roi.x = cx - extracted_roi.width / 2;
- extracted_roi.y = cy - extracted_roi.height / 2;
- // 提取目标区域像素,超边界则做填充
- cv::Mat FeaturesMap;
- cv::Mat z = RectTools::subwindow(image, extracted_roi, cv::BORDER_REPLICATE);
- // 按照比例缩小边界大小
- if (z.cols != _tmpl_sz.width || z.rows != _tmpl_sz.height) {
- cv::resize(z, z, _tmpl_sz);
- }
- // HOG features
- // 提取HOG特征点
- if (_hogfeatures) {
- IplImage z_ipl = z;
- CvLSVMFeatureMapCaskade *map; // 申请指针
- getFeatureMaps(&z_ipl, cell_size, &map); // 给map进行赋值
- normalizeAndTruncate(map,0.2f); // 归一化
- PCAFeatureMaps(map); // 由HOG特征变为PCA-HOG
- size_patch[0] = map->sizeY;
- size_patch[1] = map->sizeX;
- size_patch[2] = map->numFeatures;
- FeaturesMap = cv::Mat(cv::Size(map->numFeatures,map->sizeX*map->sizeY), CV_32F, map->map); // Procedure do deal with cv::Mat multichannel bug
- FeaturesMap = FeaturesMap.t();
- freeFeatureMapObject(&map);
- // Lab features
- // 我测试结果,带有Lab特征在一些跟踪环节效果并不好
- if (_labfeatures) {
- cv::Mat imgLab;
- cvtColor(z, imgLab, CV_BGR2Lab);
- unsigned char *input = (unsigned char*)(imgLab.data);
- // Sparse output vector
- cv::Mat outputLab = cv::Mat(_labCentroids.rows, size_patch[0]*size_patch[1], CV_32F, float(0));
- int cntCell = 0;
- // Iterate through each cell
- for (int cY = cell_size; cY < z.rows-cell_size; cY+=cell_size){
- for (int cX = cell_size; cX < z.cols-cell_size; cX+=cell_size){
- // Iterate through each pixel of cell (cX,cY)
- for(int y = cY; y < cY+cell_size; ++y){
- for(int x = cX; x < cX+cell_size; ++x){
- // Lab components for each pixel
- float l = (float)input[(z.cols * y + x) * 3];
- float a = (float)input[(z.cols * y + x) * 3 + 1];
- float b = (float)input[(z.cols * y + x) * 3 + 2];
- // Iterate trough each centroid
- float minDist = FLT_MAX;
- int minIdx = 0;
- float *inputCentroid = (float*)(_labCentroids.data);
- for(int k = 0; k < _labCentroids.rows; ++k){
- float dist = ( (l - inputCentroid[3*k]) * (l - inputCentroid[3*k]) )
- + ( (a - inputCentroid[3*k+1]) * (a - inputCentroid[3*k+1]) )
- + ( (b - inputCentroid[3*k+2]) * (b - inputCentroid[3*k+2]) );
- if(dist < minDist){
- minDist = dist;
- minIdx = k;
- }
- }
- // Store result at output
- outputLab.at<float>(minIdx, cntCell) += 1.0 / cell_sizeQ;
- //((float*) outputLab.data)[minIdx * (size_patch[0]*size_patch[1]) + cntCell] += 1.0 / cell_sizeQ;
- }
- }
- cntCell++;
- }
- }
- // Update size_patch[2] and add features to FeaturesMap
- size_patch[2] += _labCentroids.rows;
- FeaturesMap.push_back(outputLab);
- }
- }
- else {
- FeaturesMap = RectTools::getGrayImage(z);
- FeaturesMap -= (float) 0.5; // In Paper;
- size_patch[0] = z.rows;
- size_patch[1] = z.cols;
- size_patch[2] = 1;
- }
- if (inithann) {
- createHanningMats();
- }
- FeaturesMap = hann.mul(FeaturesMap);
- return FeaturesMap;
- }
- // Initialize Hanning window. Function called only in the first frame.
- // 初始化hanning窗,只执行一次,使用opencv函数做的
- void KCFTracker::createHanningMats()
- {
- cv::Mat hann1t = cv::Mat(cv::Size(size_patch[1],1), CV_32F, cv::Scalar(0));
- cv::Mat hann2t = cv::Mat(cv::Size(1,size_patch[0]), CV_32F, cv::Scalar(0));
- for (int i = 0; i < hann1t.cols; i++)
- hann1t.at<float > (0, i) = 0.5 * (1 - std::cos(2 * 3.14159265358979323846 * i / (hann1t.cols - 1)));
- for (int i = 0; i < hann2t.rows; i++)
- hann2t.at<float > (i, 0) = 0.5 * (1 - std::cos(2 * 3.14159265358979323846 * i / (hann2t.rows - 1)));
- cv::Mat hann2d = hann2t * hann1t;
- // HOG features
- if (_hogfeatures) {
- cv::Mat hann1d = hann2d.reshape(1,1); // Procedure do deal with cv::Mat multichannel bug
- hann = cv::Mat(cv::Size(size_patch[0]*size_patch[1], size_patch[2]), CV_32F, cv::Scalar(0));
- for (int i = 0; i < size_patch[2]; i++) {
- for (int j = 0; j<size_patch[0]*size_patch[1]; j++) {
- hann.at<float>(i,j) = hann1d.at<float>(0,j);
- }
- }
- }
- // Gray features
- else {
- hann = hann2d;
- }
- }
- // Calculate sub-pixel peak for one dimension
- // 使用幅值做差来定位峰值的位置,返回的是需要改变的偏移量大小
- float KCFTracker::subPixelPeak(float left, float center, float right)
- {
- float divisor = 2 * center - right - left;
- if (divisor == 0)
- return 0;
- return 0.5 * (right - left) / divisor;
- }
fhog.hpp:hog的相关函数
- #ifndef _FHOG_H_
- #define _FHOG_H_
- #include <stdio.h>
- //#include "_lsvmc_types.h"
- //#include "_lsvmc_error.h"
- //#include "_lsvmc_routine.h"
- //#include "opencv2/imgproc.hpp"
- #include "opencv2/imgproc/imgproc_c.h"
- //modified from "_lsvmc_types.h"
- // DataType: STRUCT featureMap
- // FEATURE MAP DESCRIPTION
- // Rectangular map (sizeX x sizeY),
- // every cell stores feature vector (dimension = numFeatures)
- // map - matrix of feature vectors
- // to set and get feature vectors (i,j)
- // used formula map[(j * sizeX + i) * p + k], where
- // k - component of feature vector in cell (i, j)
- typedef struct{
- int sizeX;
- int sizeY;
- int numFeatures;
- float *map;
- } CvLSVMFeatureMapCaskade;
- #include "float.h"
- #define PI CV_PI
- #define EPS 0.000001
- #define F_MAX FLT_MAX
- #define F_MIN -FLT_MAX
- // The number of elements in bin
- // The number of sectors in gradient histogram building
- #define NUM_SECTOR 9
- // The number of levels in image resize procedure
- // We need Lambda levels to resize image twice
- #define LAMBDA 10
- // Block size. Used in feature pyramid building procedure
- #define SIDE_LENGTH 8
- #define VAL_OF_TRUNCATE 0.2f
- //modified from "_lsvm_error.h"
- #define LATENT_SVM_OK 0
- #define LATENT_SVM_MEM_NULL 2
- #define DISTANCE_TRANSFORM_OK 1
- #define DISTANCE_TRANSFORM_GET_INTERSECTION_ERROR -1
- #define DISTANCE_TRANSFORM_ERROR -2
- #define DISTANCE_TRANSFORM_EQUAL_POINTS -3
- #define LATENT_SVM_GET_FEATURE_PYRAMID_FAILED -4
- #define LATENT_SVM_SEARCH_OBJECT_FAILED -5
- #define LATENT_SVM_FAILED_SUPERPOSITION -6
- #define FILTER_OUT_OF_BOUNDARIES -7
- #define LATENT_SVM_TBB_SCHEDULE_CREATION_FAILED -8
- #define LATENT_SVM_TBB_NUMTHREADS_NOT_CORRECT -9
- #define FFT_OK 2
- #define FFT_ERROR -10
- #define LSVM_PARSER_FILE_NOT_FOUND -11
- /*
- // Getting feature map for the selected subimage
- //
- // API
- // int getFeatureMaps(const IplImage * image, const int k, featureMap **map);
- // INPUT
- // image - selected subimage
- // k - size of cells
- // OUTPUT
- // map - feature map
- // RESULT
- // Error status
- */
- int getFeatureMaps(const IplImage * image, const int k, CvLSVMFeatureMapCaskade **map);
- /*
- // Feature map Normalization and Truncation
- //
- // API
- // int normalizationAndTruncationFeatureMaps(featureMap *map, const float alfa);
- // INPUT
- // map - feature map
- // alfa - truncation threshold
- // OUTPUT
- // map - truncated and normalized feature map
- // RESULT
- // Error status
- */
- int normalizeAndTruncate(CvLSVMFeatureMapCaskade *map, const float alfa);
- /*
- // Feature map reduction
- // In each cell we reduce dimension of the feature vector
- // according to original paper special procedure
- //
- // API
- // int PCAFeatureMaps(featureMap *map)
- // INPUT
- // map - feature map
- // OUTPUT
- // map - feature map
- // RESULT
- // Error status
- */
- int PCAFeatureMaps(CvLSVMFeatureMapCaskade *map);
- //modified from "lsvmc_routine.h"
- int allocFeatureMapObject(CvLSVMFeatureMapCaskade **obj, const int sizeX, const int sizeY,
- const int p);
- int freeFeatureMapObject (CvLSVMFeatureMapCaskade **obj);
- #endif
fhog.cpp:fhog的相关函数定义
- #include "fhog.hpp"
- #ifdef HAVE_TBB
- #include <tbb/tbb.h>
- #include "tbb/parallel_for.h"
- #include "tbb/blocked_range.h"
- #endif
- #ifndef max
- #define max(a,b) (((a) > (b)) ? (a) : (b))
- #endif
- #ifndef min
- #define min(a,b) (((a) < (b)) ? (a) : (b))
- #endif
- /*
- // Getting feature map for the selected subimage
- //
- // API
- // int getFeatureMaps(const IplImage * image, const int k, featureMap **map);
- // INPUT
- // image - selected subimage
- // k - size of cells
- // OUTPUT
- // map - feature map
- // RESULT
- // Error status
- */
- int getFeatureMaps(const IplImage* image, const int k, CvLSVMFeatureMapCaskade **map)
- {
- int sizeX, sizeY;
- int p, px, stringSize;
- int height, width, numChannels;
- int i, j, kk, c, ii, jj, d;
- float * datadx, * datady;
- int ch;
- float magnitude, x, y, tx, ty;
- IplImage * dx, * dy;
- int *nearest;
- float *w, a_x, b_x;
- // 横向和纵向的3长度{-1,0,1}矩阵
- float kernel[3] = {-1.f, 0.f, 1.f};
- CvMat kernel_dx = cvMat(1, 3, CV_32F, kernel); // 1*3的矩阵
- CvMat kernel_dy = cvMat(3, 1, CV_32F, kernel); // 3*1的矩阵
- float * r;
- int * alfa;
- float boundary_x[NUM_SECTOR + 1]; // boundary_x[10]
- float boundary_y[NUM_SECTOR + 1];
- float max, dotProd;
- int maxi;
- height = image->height;
- width = image->width ;
- numChannels = image->nChannels;
- // 采样图像大小的Ipl图像
- dx = cvCreateImage(cvSize(image->width, image->height),
- IPL_DEPTH_32F, 3);
- dy = cvCreateImage(cvSize(image->width, image->height),
- IPL_DEPTH_32F, 3);
- // 向下取整的(边界大小/4),k = cell_size
- sizeX = width / k;
- sizeY = height / k;
- px = 3 * NUM_SECTOR; // px=3*9=27
- p = px;
- stringSize = sizeX * p; // stringSize = 27*sizeX
- allocFeatureMapObject(map, sizeX, sizeY, p);
- // image:输入图像.
- // dx:输出图像.
- // kernel_dx:卷积核, 单通道浮点矩阵. 如果想要应用不同的核于不同的通道,先用 cvSplit 函数分解图像到单个色彩通道上,然后单独处理。
- // cvPoint(-1, 0):核的锚点表示一个被滤波的点在核内的位置。 锚点应该处于核内部。缺省值 (-1,-1) 表示锚点在核中心。
- // 函数 cvFilter2D 对图像进行线性滤波,支持 In-place 操作。当核运算部分超出输入图像时,函数从最近邻的图像内部象素差值得到边界外面的象素值。
- cvFilter2D(image, dx, &kernel_dx, cvPoint(-1, 0)); // 起点在(x-1,y),按x方向滤波
- cvFilter2D(image, dy, &kernel_dy, cvPoint(0, -1)); // 起点在(x,y-1),按y方向滤波
- // 初始化cos和sin函数
- float arg_vector;
- for(i = 0; i <= NUM_SECTOR; i++)
- {
- arg_vector = ( (float) i ) * ( (float)(PI) / (float)(NUM_SECTOR) );
- boundary_x[i] = cosf(arg_vector);
- boundary_y[i] = sinf(arg_vector);
- }/*for(i = 0; i <= NUM_SECTOR; i++) */
- r = (float *)malloc( sizeof(float) * (width * height));
- alfa = (int *)malloc( sizeof(int ) * (width * height * 2));
- for(j = 1; j < height - 1; j++)
- {
- // 每一行起点
- datadx = (float*)(dx->imageData + dx->widthStep * j);
- datady = (float*)(dy->imageData + dy->widthStep * j);
- // 遍历该行每一个元素
- for(i = 1; i < width - 1; i++)
- {
- // 第一颜色通道
- c = 0;
- x = (datadx[i * numChannels + c]);
- y = (datady[i * numChannels + c]);
- r[j * width + i] =sqrtf(x * x + y * y);
- // 使用向量大小最大的通道替代储存值
- for(ch = 1; ch < numChannels; ch++)
- {
- tx = (datadx[i * numChannels + ch]);
- ty = (datady[i * numChannels + ch]);
- magnitude = sqrtf(tx * tx + ty * ty);
- if(magnitude > r[j * width + i])
- {
- r[j * width + i] = magnitude;
- c = ch;
- x = tx;
- y = ty;
- }
- }/*for(ch = 1; ch < numChannels; ch++)*/
- // 使用sqrt(cos*x*cos*x+sin*y*sin*y)最大的替换掉
- max = boundary_x[0] * x + boundary_y[0] * y; // max = 1*x+0*y;
- maxi = 0;
- for (kk = 0; kk < NUM_SECTOR; kk++)
- {
- dotProd = boundary_x[kk] * x + boundary_y[kk] * y;
- if (dotProd > max)
- {
- max = dotProd;
- maxi = kk;
- }
- else
- {
- if (-dotProd > max)
- {
- max = -dotProd;
- maxi = kk + NUM_SECTOR; // 周期的,所以+一个周期NUM_SECTOR
- }
- }
- }
- // 看起来有点像储存cos和sin的周期值
- alfa[j * width * 2 + i * 2 ] = maxi % NUM_SECTOR;
- alfa[j * width * 2 + i * 2 + 1] = maxi;
- }/*for(i = 0; i < width; i++)*/
- }/*for(j = 0; j < height; j++)*/
- nearest = (int *)malloc(sizeof(int ) * k);
- w = (float*)malloc(sizeof(float) * (k * 2));
- // nearest=[-1,-1,1,1];
- for(i = 0; i < k / 2; i++)
- {
- nearest[i] = -1;
- }/*for(i = 0; i < k / 2; i++)*/
- for(i = k / 2; i < k; i++)
- {
- nearest[i] = 1;
- }/*for(i = k / 2; i < k; i++)*/
- // 这算的都是啥?我怎么没在算法上看见这一段???
- // 1/a 1/b
- // w[1]=_______ w[2]=_______
- // 1/a+1/b 1/a+1/b
- for(j = 0; j < k / 2; j++)
- {
- b_x = k / 2 + j + 0.5f;
- a_x = k / 2 - j - 0.5f;
- w[j * 2 ] = 1.0f/a_x * ((a_x * b_x) / ( a_x + b_x));
- w[j * 2 + 1] = 1.0f/b_x * ((a_x * b_x) / ( a_x + b_x));
- }/*for(j = 0; j < k / 2; j++)*/
- for(j = k / 2; j < k; j++)
- {
- a_x = j - k / 2 + 0.5f;
- b_x =-j + k / 2 - 0.5f + k;
- w[j * 2 ] = 1.0f/a_x * ((a_x * b_x) / ( a_x + b_x));
- w[j * 2 + 1] = 1.0f/b_x * ((a_x * b_x) / ( a_x + b_x));
- }/*for(j = k / 2; j < k; j++)*/
- // 计算梯度的公式好像和算法不太一样,应该是经过了某种离奇的推倒
- for(i = 0; i < sizeY; i++)
- {
- for(j = 0; j < sizeX; j++)
- {
- for(ii = 0; ii < k; ii++)
- {
- for(jj = 0; jj < k; jj++)
- {
- if ((i * k + ii > 0) &&
- (i * k + ii < height - 1) &&
- (j * k + jj > 0) &&
- (j * k + jj < width - 1))
- {
- d = (k * i + ii) * width + (j * k + jj);
- (*map)->map[ i * stringSize + j * (*map)->numFeatures + alfa[d * 2 ]] +=
- r[d] * w[ii * 2] * w[jj * 2];
- (*map)->map[ i * stringSize + j * (*map)->numFeatures + alfa[d * 2 + 1] + NUM_SECTOR] +=
- r[d] * w[ii * 2] * w[jj * 2];
- if ((i + nearest[ii] >= 0) &&
- (i + nearest[ii] <= sizeY - 1))
- {
- (*map)->map[(i + nearest[ii]) * stringSize + j * (*map)->numFeatures + alfa[d * 2 ] ] +=
- r[d] * w[ii * 2 + 1] * w[jj * 2 ];
- (*map)->map[(i + nearest[ii]) * stringSize + j * (*map)->numFeatures + alfa[d * 2 + 1] + NUM_SECTOR] +=
- r[d] * w[ii * 2 + 1] * w[jj * 2 ];
- }
- if ((j + nearest[jj] >= 0) &&
- (j + nearest[jj] <= sizeX - 1))
- {
- (*map)->map[i * stringSize + (j + nearest[jj]) * (*map)->numFeatures + alfa[d * 2 ] ] +=
- r[d] * w[ii * 2] * w[jj * 2 + 1];
- (*map)->map[i * stringSize + (j + nearest[jj]) * (*map)->numFeatures + alfa[d * 2 + 1] + NUM_SECTOR] +=
- r[d] * w[ii * 2] * w[jj * 2 + 1];
- }
- if ((i + nearest[ii] >= 0) &&
- (i + nearest[ii] <= sizeY - 1) &&
- (j + nearest[jj] >= 0) &&
- (j + nearest[jj] <= sizeX - 1))
- {
- (*map)->map[(i + nearest[ii]) * stringSize + (j + nearest[jj]) * (*map)->numFeatures + alfa[d * 2 ] ] +=
- r[d] * w[ii * 2 + 1] * w[jj * 2 + 1];
- (*map)->map[(i + nearest[ii]) * stringSize + (j + nearest[jj]) * (*map)->numFeatures + alfa[d * 2 + 1] + NUM_SECTOR] +=
- r[d] * w[ii * 2 + 1] * w[jj * 2 + 1];
- }
- }
- }/*for(jj = 0; jj < k; jj++)*/
- }/*for(ii = 0; ii < k; ii++)*/
- }/*for(j = 1; j < sizeX - 1; j++)*/
- }/*for(i = 1; i < sizeY - 1; i++)*/
- // 释放变量
- cvReleaseImage(&dx);
- cvReleaseImage(&dy);
- free(w);
- free(nearest);
- free(r);
- free(alfa);
- return LATENT_SVM_OK;
- }
- /*
- // Feature map Normalization and Truncation
- //
- // API
- // int normalizeAndTruncate(featureMap *map, const float alfa);
- // INPUT
- // map - feature map
- // alfa - truncation threshold
- // OUTPUT
- // map - truncated and normalized feature map
- // RESULT
- // Error status
- */
- //
- int normalizeAndTruncate(CvLSVMFeatureMapCaskade *map, const float alfa)
- {
- int i,j, ii;
- int sizeX, sizeY, p, pos, pp, xp, pos1, pos2;
- float * partOfNorm; // norm of C(i, j)
- float * newData;
- float valOfNorm;
- sizeX = map->sizeX;
- sizeY = map->sizeY;
- partOfNorm = (float *)malloc (sizeof(float) * (sizeX * sizeY));
- p = NUM_SECTOR;
- xp = NUM_SECTOR * 3;
- pp = NUM_SECTOR * 12;
- for(i = 0; i < sizeX * sizeY; i++)
- {
- valOfNorm = 0.0f;
- pos = i * map->numFeatures;
- for(j = 0; j < p; j++)
- {
- valOfNorm += map->map[pos + j] * map->map[pos + j];
- }/*for(j = 0; j < p; j++)*/
- partOfNorm[i] = valOfNorm;
- }/*for(i = 0; i < sizeX * sizeY; i++)*/
- sizeX -= 2;
- sizeY -= 2;
- newData = (float *)malloc (sizeof(float) * (sizeX * sizeY * pp));
- //normalization
- for(i = 1; i <= sizeY; i++)
- {
- for(j = 1; j <= sizeX; j++)
- {
- valOfNorm = sqrtf(
- partOfNorm[(i )*(sizeX + 2) + (j )] +
- partOfNorm[(i )*(sizeX + 2) + (j + 1)] +
- partOfNorm[(i + 1)*(sizeX + 2) + (j )] +
- partOfNorm[(i + 1)*(sizeX + 2) + (j + 1)]) + FLT_EPSILON;
- pos1 = (i ) * (sizeX + 2) * xp + (j ) * xp;
- pos2 = (i-1) * (sizeX ) * pp + (j-1) * pp;
- for(ii = 0; ii < p; ii++)
- {
- newData[pos2 + ii ] = map->map[pos1 + ii ] / valOfNorm;
- }/*for(ii = 0; ii < p; ii++)*/
- for(ii = 0; ii < 2 * p; ii++)
- {
- newData[pos2 + ii + p * 4] = map->map[pos1 + ii + p] / valOfNorm;
- }/*for(ii = 0; ii < 2 * p; ii++)*/
- valOfNorm = sqrtf(
- partOfNorm[(i )*(sizeX + 2) + (j )] +
- partOfNorm[(i )*(sizeX + 2) + (j + 1)] +
- partOfNorm[(i - 1)*(sizeX + 2) + (j )] +
- partOfNorm[(i - 1)*(sizeX + 2) + (j + 1)]) + FLT_EPSILON;
- for(ii = 0; ii < p; ii++)
- {
- newData[pos2 + ii + p ] = map->map[pos1 + ii ] / valOfNorm;
- }/*for(ii = 0; ii < p; ii++)*/
- for(ii = 0; ii < 2 * p; ii++)
- {
- newData[pos2 + ii + p * 6] = map->map[pos1 + ii + p] / valOfNorm;
- }/*for(ii = 0; ii < 2 * p; ii++)*/
- valOfNorm = sqrtf(
- partOfNorm[(i )*(sizeX + 2) + (j )] +
- partOfNorm[(i )*(sizeX + 2) + (j - 1)] +
- partOfNorm[(i + 1)*(sizeX + 2) + (j )] +
- partOfNorm[(i + 1)*(sizeX + 2) + (j - 1)]) + FLT_EPSILON;
- for(ii = 0; ii < p; ii++)
- {
- newData[pos2 + ii + p * 2] = map->map[pos1 + ii ] / valOfNorm;
- }/*for(ii = 0; ii < p; ii++)*/
- for(ii = 0; ii < 2 * p; ii++)
- {
- newData[pos2 + ii + p * 8] = map->map[pos1 + ii + p] / valOfNorm;
- }/*for(ii = 0; ii < 2 * p; ii++)*/
- valOfNorm = sqrtf(
- partOfNorm[(i )*(sizeX + 2) + (j )] +
- partOfNorm[(i )*(sizeX + 2) + (j - 1)] +
- partOfNorm[(i - 1)*(sizeX + 2) + (j )] +
- partOfNorm[(i - 1)*(sizeX + 2) + (j - 1)]) + FLT_EPSILON;
- for(ii = 0; ii < p; ii++)
- {
- newData[pos2 + ii + p * 3 ] = map->map[pos1 + ii ] / valOfNorm;
- }/*for(ii = 0; ii < p; ii++)*/
- for(ii = 0; ii < 2 * p; ii++)
- {
- newData[pos2 + ii + p * 10] = map->map[pos1 + ii + p] / valOfNorm;
- }/*for(ii = 0; ii < 2 * p; ii++)*/
- }/*for(j = 1; j <= sizeX; j++)*/
- }/*for(i = 1; i <= sizeY; i++)*/
- //truncation
- for(i = 0; i < sizeX * sizeY * pp; i++)
- {
- if(newData [i] > alfa) newData [i] = alfa;
- }/*for(i = 0; i < sizeX * sizeY * pp; i++)*/
- //swop data
- map->numFeatures = pp;
- map->sizeX = sizeX;
- map->sizeY = sizeY;
- free (map->map);
- free (partOfNorm);
- map->map = newData;
- return LATENT_SVM_OK;
- }
- /*
- // Feature map reduction
- // In each cell we reduce dimension of the feature vector
- // according to original paper special procedure
- //
- // API
- // int PCAFeatureMaps(featureMap *map)
- // INPUT
- // map - feature map
- // OUTPUT
- // map - feature map
- // RESULT
- // Error status
- */
- int PCAFeatureMaps(CvLSVMFeatureMapCaskade *map)
- {
- int i,j, ii, jj, k;
- int sizeX, sizeY, p, pp, xp, yp, pos1, pos2;
- float * newData;
- float val;
- float nx, ny;
- // 初始化Hog所需要的参数
- sizeX = map->sizeX;
- sizeY = map->sizeY;
- p = map->numFeatures; // 3*9
- pp = NUM_SECTOR * 3 + 4; // 9*3+4
- yp = 4;
- xp = NUM_SECTOR;
- nx = 1.0f / sqrtf((float)(xp * 2));
- ny = 1.0f / sqrtf((float)(yp ));
- // 新建一个map->map的指针
- newData = (float *)malloc (sizeof(float) * (sizeX * sizeY * pp));
- for(i = 0; i < sizeY; i++)
- {
- for(j = 0; j < sizeX; j++)
- {
- pos1 = ((i)*sizeX + j)*p;
- pos2 = ((i)*sizeX + j)*pp;
- k = 0;
- for(jj = 0; jj < xp * 2; jj++)
- {
- val = 0;
- for(ii = 0; ii < yp; ii++)
- {
- val += map->map[pos1 + yp * xp + ii * xp * 2 + jj];
- }/*for(ii = 0; ii < yp; ii++)*/
- newData[pos2 + k] = val * ny;
- k++;
- }/*for(jj = 0; jj < xp * 2; jj++)*/
- for(jj = 0; jj < xp; jj++)
- {
- val = 0;
- for(ii = 0; ii < yp; ii++)
- {
- val += map->map[pos1 + ii * xp + jj];
- }/*for(ii = 0; ii < yp; ii++)*/
- newData[pos2 + k] = val * ny;
- k++;
- }/*for(jj = 0; jj < xp; jj++)*/
- for(ii = 0; ii < yp; ii++)
- {
- val = 0;
- for(jj = 0; jj < 2 * xp; jj++)
- {
- val += map->map[pos1 + yp * xp + ii * xp * 2 + jj];
- }/*for(jj = 0; jj < xp; jj++)*/
- newData[pos2 + k] = val * nx;
- k++;
- } /*for(ii = 0; ii < yp; ii++)*/
- }/*for(j = 0; j < sizeX; j++)*/
- }/*for(i = 0; i < sizeY; i++)*/
- //swop data
- // 将计算结果,指针复制到结果输出的map上
- map->numFeatures = pp;
- free (map->map);
- map->map = newData;
- return LATENT_SVM_OK; // return 0
- }
- //modified from "lsvmc_routine.cpp"
- // 根据输入,转换成指针**obj,其中(*obj)->map为sizeX * sizeY * numFeatures大小
- int allocFeatureMapObject(CvLSVMFeatureMapCaskade **obj, const int sizeX,
- const int sizeY, const int numFeatures)
- {
- int i;
- (*obj) = (CvLSVMFeatureMapCaskade *)malloc(sizeof(CvLSVMFeatureMapCaskade));
- (*obj)->sizeX = sizeX;
- (*obj)->sizeY = sizeY;
- (*obj)->numFeatures = numFeatures; // 27
- (*obj)->map = (float *) malloc(sizeof (float) *
- (sizeX * sizeY * numFeatures));
- for(i = 0; i < sizeX * sizeY * numFeatures; i++)
- {
- (*obj)->map[i] = 0.0f;
- }
- return LATENT_SVM_OK;
- }
- // 释放自己定义的CvLSVMFeatureMapCaskade数据
- int freeFeatureMapObject (CvLSVMFeatureMapCaskade **obj)
- {
- if(*obj == NULL) return LATENT_SVM_MEM_NULL;
- free((*obj)->map);
- free(*obj);
- (*obj) = NULL;
- return LATENT_SVM_OK;
- }
ffttools.hpp
- #pragma once
- //#include <cv.h>
- #ifndef _OPENCV_FFTTOOLS_HPP_
- #define _OPENCV_FFTTOOLS_HPP_
- #endif
- //NOTE: FFTW support is still shaky, disabled for now.
- /*#ifdef USE_FFTW
- #include <fftw3.h>
- #endif*/
- namespace FFTTools
- {
- // Previous declarations, to avoid warnings
- cv::Mat fftd(cv::Mat img, bool backwards = false);
- cv::Mat real(cv::Mat img);
- cv::Mat imag(cv::Mat img);
- cv::Mat magnitude(cv::Mat img);
- cv::Mat complexMultiplication(cv::Mat a, cv::Mat b);
- cv::Mat complexDivision(cv::Mat a, cv::Mat b);
- void rearrange(cv::Mat &img);
- void normalizedLogTransform(cv::Mat &img);
- // 做dfft
- cv::Mat fftd(cv::Mat img, bool backwards)
- {
- /*
- #ifdef USE_FFTW
- fftw_complex * fm = (fftw_complex*) fftw_malloc(sizeof (fftw_complex) * img.cols * img.rows);
- fftw_plan p = fftw_plan_dft_2d(img.rows, img.cols, fm, fm, backwards ? 1 : -1, 0 * FFTW_ESTIMATE);
- if (img.channels() == 1)
- {
- for (int i = 0; i < img.rows; i++)
- for (int j = 0; j < img.cols; j++)
- {
- fm[i * img.cols + j][0] = img.at<float>(i, j);
- fm[i * img.cols + j][1] = 0;
- }
- }
- else
- {
- assert(img.channels() == 2);
- for (int i = 0; i < img.rows; i++)
- for (int j = 0; j < img.cols; j++)
- {
- fm[i * img.cols + j][0] = img.at<cv::Vec2d > (i, j)[0];
- fm[i * img.cols + j][1] = img.at<cv::Vec2d > (i, j)[1];
- }
- }
- fftw_execute(p);
- cv::Mat res(img.rows, img.cols, CV_64FC2);
- for (int i = 0; i < img.rows; i++)
- for (int j = 0; j < img.cols; j++)
- {
- res.at<cv::Vec2d > (i, j)[0] = fm[i * img.cols + j][0];
- res.at<cv::Vec2d > (i, j)[1] = fm[i * img.cols + j][1];
- // _iout(fm[i * img.cols + j][0]);
- }
- if (backwards)res *= 1.d / (float) (res.cols * res.rows);
- fftw_free(p);
- fftw_free(fm);
- return res;
- #else
- */
- if (img.channels() == 1)
- {
- cv::Mat planes[] = {cv::Mat_<float> (img), cv::Mat_<float>::zeros(img.size())};
- //cv::Mat planes[] = {cv::Mat_<double> (img), cv::Mat_<double>::zeros(img.size())};
- cv::merge(planes, 2, img);
- }
- cv::dft(img, img, backwards ? (cv::DFT_INVERSE | cv::DFT_SCALE) : 0 );
- return img;
- /*#endif*/
- }
- // 图像实部
- cv::Mat real(cv::Mat img)
- {
- std::vector<cv::Mat> planes;
- cv::split(img, planes);
- return planes[0];
- }
- // 图像虚部
- cv::Mat imag(cv::Mat img)
- {
- std::vector<cv::Mat> planes;
- cv::split(img, planes);
- return planes[1];
- }
- // 复数的模
- cv::Mat magnitude(cv::Mat img)
- {
- cv::Mat res;
- std::vector<cv::Mat> planes;
- cv::split(img, planes); // planes[0] = Re(DFT(I), planes[1] = Im(DFT(I)) 实部虚部分开
- if (planes.size() == 1) res = cv::abs(img);
- else if (planes.size() == 2) cv::magnitude(planes[0], planes[1], res); // planes[0] = magnitude 求复数的模(sqrt(x(I)^2+y(I)^2))
- else assert(0);
- return res;
- }
- // 复数乘法
- cv::Mat complexMultiplication(cv::Mat a, cv::Mat b)
- {
- std::vector<cv::Mat> pa;
- std::vector<cv::Mat> pb;
- cv::split(a, pa); //通道拆分
- cv::split(b, pb); //通道拆分
- std::vector<cv::Mat> pres;
- pres.push_back(pa[0].mul(pb[0]) - pa[1].mul(pb[1]));
- pres.push_back(pa[0].mul(pb[1]) + pa[1].mul(pb[0]));
- cv::Mat res;
- cv::merge(pres, res); //合并通道
- return res;
- }
- // 复数除法
- cv::Mat complexDivision(cv::Mat a, cv::Mat b)
- {
- std::vector<cv::Mat> pa;
- std::vector<cv::Mat> pb;
- cv::split(a, pa); // 通道拆分
- cv::split(b, pb); // 通道拆分
- cv::Mat divisor = 1. / (pb[0].mul(pb[0]) + pb[1].mul(pb[1])); // 实部虚部相乘求倒数
- std::vector<cv::Mat> pres;
- pres.push_back((pa[0].mul(pb[0]) + pa[1].mul(pb[1])).mul(divisor));
- pres.push_back((pa[1].mul(pb[0]) + pa[0].mul(pb[1])).mul(divisor));
- cv::Mat res;
- cv::merge(pres, res); // 合并通道
- return res;
- }
- // 区域搬移
- // 0 1 变成-> 3 2
- // 2 3 1 0
- void rearrange(cv::Mat &img)
- {
- // img = img(cv::Rect(0, 0, img.cols & -2, img.rows & -2));
- int cx = img.cols / 2;
- int cy = img.rows / 2;
- cv::Mat q0(img, cv::Rect(0, 0, cx, cy)); // Top-Left - Create a ROI per quadrant
- cv::Mat q1(img, cv::Rect(cx, 0, cx, cy)); // Top-Right
- cv::Mat q2(img, cv::Rect(0, cy, cx, cy)); // Bottom-Left
- cv::Mat q3(img, cv::Rect(cx, cy, cx, cy)); // Bottom-Right
- cv::Mat tmp; // swap quadrants (Top-Left with Bottom-Right)
- q0.copyTo(tmp);
- q3.copyTo(q0);
- tmp.copyTo(q3);
- q1.copyTo(tmp); // swap quadrant (Top-Right with Bottom-Left)
- q2.copyTo(q1);
- tmp.copyTo(q2);
- }
- /*
- template < typename type>
- cv::Mat fouriertransFull(const cv::Mat & in)
- {
- return fftd(in);
- cv::Mat planes[] = {cv::Mat_<type > (in), cv::Mat_<type>::zeros(in.size())};
- cv::Mat t;
- assert(planes[0].depth() == planes[1].depth());
- assert(planes[0].size == planes[1].size);
- cv::merge(planes, 2, t);
- cv::dft(t, t);
- //cv::normalize(a, a, 0, 1, CV_MINMAX);
- //cv::normalize(t, t, 0, 1, CV_MINMAX);
- // cv::imshow("a",real(a));
- // cv::imshow("b",real(t));
- // cv::waitKey(0);
- return t;
- }*/
- // 做对数变换初始化
- void normalizedLogTransform(cv::Mat &img)
- {
- img = cv::abs(img);
- img += cv::Scalar::all(1);
- cv::log(img, img);
- // cv::normalize(img, img, 0, 1, CV_MINMAX);
- }
- }
recttools.hpp
- #pragma once
- //#include <cv.h>
- #include <math.h>
- #ifndef _OPENCV_RECTTOOLS_HPP_
- #define _OPENCV_RECTTOOLS_HPP_
- #endif
- namespace RectTools
- {
- // 取中心坐标
- template <typename t>
- inline cv::Vec<t, 2 > center(const cv::Rect_<t> &rect)
- {
- return cv::Vec<t, 2 > (rect.x + rect.width / (t) 2, rect.y + rect.height / (t) 2);
- }
- // 取右边界
- template <typename t>
- inline t x2(const cv::Rect_<t> &rect)
- {
- return rect.x + rect.width;
- }
- // 取下边界
- template <typename t>
- inline t y2(const cv::Rect_<t> &rect)
- {
- return rect.y + rect.height;
- }
- // 按scalex重新定义框的大小
- template <typename t>
- inline void resize(cv::Rect_<t> &rect, float scalex, float scaley = 0)
- {
- if (!scaley)scaley = scalex;
- rect.x -= rect.width * (scalex - 1.f) / 2.f;
- rect.width *= scalex;
- rect.y -= rect.height * (scaley - 1.f) / 2.f;
- rect.height *= scaley;
- }
- // 把rect限制在limit的范围内
- template <typename t>
- inline void limit(cv::Rect_<t> &rect, cv::Rect_<t> limit)
- {
- if (rect.x + rect.width > limit.x + limit.width)rect.width = (limit.x + limit.width - rect.x);
- if (rect.y + rect.height > limit.y + limit.height)rect.height = (limit.y + limit.height - rect.y);
- if (rect.x < limit.x)
- {
- rect.width -= (limit.x - rect.x);
- rect.x = limit.x;
- }
- if (rect.y < limit.y)
- {
- rect.height -= (limit.y - rect.y);
- rect.y = limit.y;
- }
- if(rect.width<0)rect.width=0;
- if(rect.height<0)rect.height=0;
- }
- // 接口重定义
- template <typename t>
- inline void limit(cv::Rect_<t> &rect, t width, t height, t x = 0, t y = 0)
- {
- limit(rect, cv::Rect_<t > (x, y, width, height));
- }
- // 取超出来的边界
- template <typename t>
- inline cv::Rect getBorder(const cv::Rect_<t > &original, cv::Rect_<t > & limited)
- {
- cv::Rect_<t > res;
- res.x = limited.x - original.x;
- res.y = limited.y - original.y;
- res.width = x2(original) - x2(limited);
- res.height = y2(original) - y2(limited);
- assert(res.x >= 0 && res.y >= 0 && res.width >= 0 && res.height >= 0);
- return res;
- }
- // 取窗口大小
- inline cv::Mat subwindow(const cv::Mat &in, const cv::Rect & window, int borderType = cv::BORDER_CONSTANT)
- {
- cv::Rect cutWindow = window;
- RectTools::limit(cutWindow, in.cols, in.rows);
- if (cutWindow.height <= 0 || cutWindow.width <= 0)assert(0); //return cv::Mat(window.height,window.width,in.type(),0) ;
- cv::Rect border = RectTools::getBorder(window, cutWindow);
- cv::Mat res = in(cutWindow);
- if (border != cv::Rect(0, 0, 0, 0))
- {
- // 使用Opencv来复制图像并填充边界
- cv::copyMakeBorder(res, res, border.y, border.height, border.x, border.width, borderType);
- }
- return res;
- }
- // 获取灰度图像并将像素灰度转换成0-1之间的小数
- inline cv::Mat getGrayImage(cv::Mat img)
- {
- cv::cvtColor(img, img, CV_BGR2GRAY);
- img.convertTo(img, CV_32F, 1 / 255.f);
- return img;
- }
- }
runtracker.hpp:跟踪主程序
- #include <iostream>
- #include <fstream>
- #include <sstream>
- #include <algorithm>
- #include <opencv2/core/core.hpp>
- #include <opencv2/highgui/highgui.hpp>
- #include "kcftracker.hpp"
- #include <dirent.h>
- #include <sys/time.h>
- using namespace std;
- using namespace cv;
- int main(int argc, char* argv[]){
- struct timeval tv, tz,tv0, tz0;
- if (argc > 5) return -1; // 输入大于5个参数
- bool HOG = true; // 是否使用hog特征
- bool FIXEDWINDOW = false; // 是否使用修正窗口
- bool MULTISCALE = true; // 是否使用多尺度
- bool SILENT = true; // 是否不做显示
- bool LAB = false; // 是否使用LAB颜色
- for(int i = 0; i < argc; i++){
- if ( strcmp (argv[i], "hog") == 0 )
- HOG = true;
- if ( strcmp (argv[i], "fixed_window") == 0 )
- FIXEDWINDOW = true;
- if ( strcmp (argv[i], "singlescale") == 0 )
- MULTISCALE = false;
- if ( strcmp (argv[i], "show") == 0 )
- SILENT = false;
- if ( strcmp (argv[i], "lab") == 0 ){
- LAB = true;
- HOG = true;
- }
- if ( strcmp (argv[i], "gray") == 0 )
- HOG = false;
- }
- // Create KCFTracker object
- // 创建KCF跟踪器
- KCFTracker tracker(HOG, FIXEDWINDOW, MULTISCALE, LAB);
- // Frame readed
- // 当前帧
- Mat frame;
- // Tracker results
- // 跟踪结果目标框
- Rect result;
- // Path to list.txt
- // images.txt的路径,用于读取图像
- ifstream listFile;
- string fileName = "images.txt";
- listFile.open(fileName);
- // Read groundtruth for the 1st frame
- // 读取第一帧的目标区域
- ifstream groundtruthFile;
- string groundtruth = "region.txt";
- groundtruthFile.open(groundtruth);
- string firstLine;
- getline(groundtruthFile, firstLine);
- groundtruthFile.close();
- istringstream ss(firstLine);
- // Read groundtruth like a dumb
- // 从给定的第一帧目标框读入四个顶点的坐标
- float x1, y1, x2, y2, x3, y3, x4, y4;
- char ch;
- ss >> x1;
- ss >> ch;
- ss >> y1;
- ss >> ch;
- ss >> x2;
- ss >> ch;
- ss >> y2;
- ss >> ch;
- ss >> x3;
- ss >> ch;
- ss >> y3;
- ss >> ch;
- ss >> x4;
- ss >> ch;
- ss >> y4;
- // Using min and max of X and Y for groundtruth rectangle
- // 使用四个顶点计算出目标框
- float xMin = min(x1, min(x2, min(x3, x4)));
- float yMin = min(y1, min(y2, min(y3, y4)));
- float width = max(x1, max(x2, max(x3, x4))) - xMin;
- float height = max(y1, max(y2, max(y3, y4))) - yMin;
- // Read Images
- // 读图像
- ifstream listFramesFile;
- string listFrames = "images.txt";
- listFramesFile.open(listFrames);
- string frameName;
- // Write Results
- // 将结果写入output.txt
- ofstream resultsFile;
- string resultsPath = "output.txt";
- resultsFile.open(resultsPath);
- // Frame counter
- // 帧号计数
- int nFrames = 0;
- char name_write[15] = {};
- while ( getline(listFramesFile, frameName) ){
- frameName = frameName;
- // Read each frame from the list
- // 读取列表上面的帧
- frame = imread(frameName, CV_LOAD_IMAGE_COLOR);
- // First frame, give the groundtruth to the tracker
- // 使用第一帧和目标框来初始化跟踪器
- if (nFrames == 0) {
- tracker.init( Rect(xMin, yMin, width, height), frame );
- rectangle( frame, Point( xMin, yMin ), Point( xMin+width, yMin+height), Scalar( 0, 255, 255 ), 1, 8 );
- resultsFile << xMin << "," << yMin << "," << width << "," << height << endl;
- }
- // Update
- // 更新当前帧的结果
- else{
- result = tracker.update(frame);
- rectangle( frame, Point( result.x, result.y ), Point( result.x+result.width, result.y+result.height), Scalar( 0, 255, 255 ), 1, 8 );
- resultsFile << result.x << "," << result.y << "," << result.width << "," << result.height << endl;
- }
- nFrames++;
- // 显示并保存
- if (!SILENT){
- imshow("Image", frame);
- waitKey(1);
- sprintf(name_write, "%04d.jpg", nFrames);
- imwrite(name_write, frame);
- }
- }
- // 关闭文件
- resultsFile.close();
- listFile.close();
- }