附上本篇论文作者主页:http://www.cvl.isy.liu.se/research/objrec/visualtracking/colvistrack/index.html (有论文原文和matlab代码)
我从github上找的c++版本代码:https://github.com/mostafaizz/ColorTracker
本论文是目标跟踪领域内大神MD在14年CVPR发表的论文,这篇论文利用Color Name提取目标特征图,利用CSK跟踪模型,CSK模型是大名鼎鼎的相关滤波KCF(2015年 TPAMI)的前作,进行目标跟踪。本篇博客简要介绍一下其原理,重点介绍其实现的C++代码。
CSK论文:http://fcv2011.ulsan.ac.kr/files/announcement/455/csk_tracker_eccv2012.pdf
1.背景介绍
1.1 CSK模型
CSK模型 这篇论文 https://blog.csdn.net/carrierlxksuper/article/details/50328437
KCF模型(后作)推荐这篇论文,https://www.cnblogs.com/YiXiaoZhou/p/5925019.html,讲的非常好。
1.2颜色跟踪模型
在这里所说的color name是之前定的一篇论文的成果《 Learning color names for real-world applications,2009》,他可以提取图像的11维颜色特征,再加上图像的灰度特征,作为传统CSK跟踪模型的特征map x,传统的CSK模型特征 map是一维的,这里做了改进,而且后续的KCF模型中特征图x是提取图像的HOG作为feature map x的。
图1
图2
原理自行阅读论文,这些公式在之后的代码中是有所体现的。
1.3降维
作者为了减少特征维度,采用PCA降维方法,具体公式为(6)和(7),然后作者给出了算法流程图。
图3 algorithm流程图
图4 对x求高斯变换,再作相关操作
图5 CSK论文中的CSK跟踪模型
具体不解释了,自行理解,具体代码有所体现。
2.代码分析
我贴的为c++代码,下载请从开头给出的github网址下载即可。
2.1主函数
void test_ball1()
{
ColorTrackerParameters params; //定义一个ColorTrackerParameters的类对象params,包括了很多对象成员变量;
params.visualization = 1; //就是设置显示实时跟踪结果图
cv::Point pos = cv::Point(492, 417);//初始跟踪矩形目标框的左上角坐标(x,y)
cv::Size target_sz = cv::Size(47, 46);//矩形目标框的大小位置
params.init_pos.x = (int)(floor(pos.x) + floor(target_sz.width / 2));//矩形目标中心位置坐标x
params.init_pos.y = (int)(floor(pos.y) + floor(target_sz.height / 2));//矩形目标中心位置坐标y
params.wsize = cv::Size((int)floor(target_sz.width), (int)floor(target_sz.height));
ColorTracker tracker(params);//初始化跟踪器参数,可自行打开阅读
tracker.init_tracking();//初始化跟踪器
for (int frame_index = 1; frame_index <= 105; frame_index++) //ball1总共有105帧
{
ostringstream ostr;
ostr << "sequences/ball1/imgs/";
ostr << setfill('0') << setw(8) << frame_index << ".jpg";
cv::Mat current_frame = cv::imread(ostr.str()); //opencv函数读取图片,图片返回作为当前帧current_frame
tracker.track_frame(current_frame);//跟踪当前帧
}
cv::waitKey(0);
}
int main(int argc, char** argv)
{
//test_Scate();
//test_soccer();
//test_video();
test_ball1(); //测试跟踪足球(VOT2015中的一个数据集)
return 0;
}
2.2track.init_tracking()函数
void ColorTracker::init_tracking()
{
// use_dimensionality_reduction
use_dimensionality_reduction = (params.compressed_features.size() != 0);
target_sz = cv::Size(params.wsize.width, params.wsize.height);
pos = cv::Point(params.init_pos.x, params.init_pos.y);
// window size, taking padding into account
sz_with_padding = cv::Size((int)floor(target_sz.width * (1 + params.padding)), (int)floor(target_sz.height * (1 + params.padding)));
//扩选上一帧目标区域的大小,这里params.padding为1,基本就是将目标区域扩大一倍。
// desired output(gaussian shaped), bandwidth proportional to target size
double output_sigma = sqrt(target_sz.width * target_sz.height) * params.output_sigma_factor;
Mat y = Mat::zeros(sz_with_padding, CV_64FC1);
double output_sigma_square = output_sigma * output_sigma;
int ind = 0;
double *data = ((double*)(y.data));
for (int i = 1; i <= sz_with_padding.height; i++)
{
for (int j = 1; j <= sz_with_padding.width; j++)
{
int tmpCs = j - sz_with_padding.width / 2;
int tmpRs = i - sz_with_padding.height / 2;
data[ind++] = exp(-0.5 / output_sigma_square * (tmpRs * tmpRs + tmpCs * tmpCs));
}
}
cv::dft(y, yf, DFT_COMPLEX_OUTPUT);
//以上操作:初始帧的扩选区域为sz_with_padding ,以扩选区域中心位置做高斯衰减得到y,对y做DFT变换,得到yf.
// store pre - computed cosine window
Mat hann_1 = Mat::zeros(cv::Size(1, sz_with_padding.height), CV_64FC1);//hann窗
ind = 0;
data = ((double*)hann_1.data);
for (int i = 0; i < sz_with_padding.height; i++)
{
data[ind++] = (0.5 * (1 - cos(2 * 3.14159265359 * i / (sz_with_padding.height - 1))));
}
Mat hann_2 = Mat::zeros(cv::Size(1, sz_with_padding.width), CV_64FC1);
ind = 0;
data = ((double*)hann_2.data);
for (int i = 0; i < sz_with_padding.width; i++)
{
data[ind++] = (0.5 * (1 - cos(2 * 3.14159265359 * i / (sz_with_padding.width - 1))));
}
cos_window = hann_1 * hann_2.t(); //con_window是根据sz_with_padding制作的hann窗
frame_index = 1;
}
2.3ColorTracker::track_frame()函数
cv::Rect ColorTracker::track_frame(Mat ¤t_frame)
{
if (frame_index > 1) //除了第一帧之外的后序帧
{
// compute the compressed learnt appearance
vector<Mat> zp = feature_projection(z_npca, z_pca, projection_matrix, cos_window);//提取上一帧真实目标特征map zp
// extract the feature map of the local image patch
Mat xo_npca, xo_pca;
get_subwindow(current_frame, pos, sz_with_padding, params.non_compressed_features, params.compressed_features, w2c, xo_npca, xo_pca);
// do the dimensionality reduction and windowing
//得到当前帧的候选区域特征map x
vector<Mat> x = feature_projection(xo_npca, xo_pca, projection_matrix, cos_window);
// calculate the response of the classifier
//对应着CSK模型的跟踪,如博客图5所示
cv::dft(dense_gauss_kernel(params.sigma, x, zp), kf, DFT_COMPLEX_OUTPUT);
num1 = mul_complex_element_by_element(alphaf_num, kf);
num2 = mul_complex_element_by_element_second_conjugate(num1, alphaf_den);
denum1 = mul_complex_element_by_element_second_conjugate(alphaf_den, alphaf_den);
cv::split(denum1, denum);
cv::split(num2, num);
cv::divide(num[0], denum[0], num[0]);
cv::divide(num[1], denum[0], num[1]);
cv::merge(num, tmp_r);
//求最大响应图
cv::dft(tmp_r, response, DFT_INVERSE | DFT_REAL_OUTPUT);
response = response / (response.cols * response.rows);
// target location is at the maximum response
//更新当前帧目标位置和大小
double minVal;
double maxVal;
Point minLoc;
Point maxLoc;
cv::minMaxLoc(response, &minVal, &maxVal, &minLoc, &maxLoc);
pos = pos - cv::Point((int)floor(sz_with_padding.width / 2), (int)floor(sz_with_padding.height / 2))
+ cv::Point(maxLoc.x + 1, maxLoc.y + 1);
if (pos.x < 0)
{
pos.x = 0;
}
if (pos.y < 0)
{
pos.y = 0;
}
if (pos.x >= current_frame.cols)
{
pos.x = current_frame.cols - 1;
}
if (pos.y >= current_frame.rows)
{
pos.y = current_frame.rows;
}
}
else
{
im_patch = Mat::zeros(sz_with_padding.height, sz_with_padding.width, current_frame.type());
}
// extract the feature map of the local image patch to train the classifer
//取当前帧的目标特征图
//w2c为加载的Color Name特征,xo_npca, xo_pca为得到的非PCA和PCA特征。
get_subwindow(current_frame, pos, sz_with_padding, params.non_compressed_features,
params.compressed_features, w2c, xo_npca, xo_pca);
if (frame_index == 1)
{
// initialize the appearance
z_npca = xo_npca;
z_pca = xo_pca;
// set number of compressed dimensions to maximum if too many
params.num_compressed_dim = std::min(params.num_compressed_dim, xo_pca.cols);
}
else
{
// update the appearance
z_npca = (1 - params.learning_rate) * z_npca + params.learning_rate * xo_npca; //对应着论文公式(5c)
z_pca = (1 - params.learning_rate) * z_pca + params.learning_rate * xo_pca; //对应着论文公式(5c)
}
// if dimensionality reduction is used: update the projection matrix
if (use_dimensionality_reduction)
{
if (frame_index == 1)
{
// compute the mean appearance
data_matrix = Mat::zeros(z_pca.rows, z_pca.cols, CV_64FC1);
}
// compute the mean appearance
reduce(z_pca, data_mean, 0, CV_REDUCE_AVG);
// substract the mean from the appearance to get the data matrix
double*data = ((double*)data_matrix.data);
for (int i = 0; i < z_pca.rows; i++)
{
memcpy(data + i * z_pca.cols, ((Mat)(z_pca.row(i) - data_mean)).data, z_pca.cols * sizeof(double));
}
// calculate the covariance matrix
//对应着algorithm公式1和2
cov_matrix = (1.0 / (sz_with_padding.width * sz_with_padding.height - 1))
* (data_matrix.t() * data_matrix);
//cov_matrix.convertTo(cov_matrix, CV_32FC1);
// calculate the principal components (pca_basis) and corresponding variances
if (frame_index == 1)
{
Mat vt;
cv::SVD::compute(cov_matrix, pca_variances, pca_basis, vt); //cov_matrix是algorithm 步骤3中的R1,这里cv::SVD::compute执行步骤8,pca_variances为步骤8中的Sp, pca_basis为Ep, vt为Ep的转置,在这里第一帧,所以p为1
}
else
{
Mat vt;
cv::SVD::compute((1 - params.compression_learning_rate) * old_cov_matrix + params.compression_learning_rate * cov_matrix,
pca_variances, pca_basis, vt); //对应着algorithm 步骤6和8
}
// calculate the projection matrix as the first principal
// components and extract their corresponding variances
//执行algorithm 步骤9和10
projection_matrix = pca_basis(cv::Rect(0, 0, params.num_compressed_dim, pca_basis.rows)).clone();
Mat projection_variances = Mat::zeros(params.num_compressed_dim, params.num_compressed_dim, CV_64FC1);
for (int i = 0; i < params.num_compressed_dim; i++)
{
((double*)projection_variances.data)[i + i*params.num_compressed_dim] = ((double*)pca_variances.data)[i];
}
if (frame_index == 1)
{
// initialize the old covariance matrix using the computed
// projection matrix and variances
//执行algorithm 步骤12
old_cov_matrix = projection_matrix * projection_variances * projection_matrix.t();
}
else
{
// update the old covariance matrix using the computed
// projection matrix and variances
//执行algorithm 步骤14
old_cov_matrix =
(1 - params.compression_learning_rate) * old_cov_matrix +
params.compression_learning_rate * (projection_matrix * projection_variances * projection_matrix.t());
}
}
// project the features of the new appearance example using the new
// projection matrix
//因为projection_matrix更新了,重新得到特征图x
vector<Mat> x = feature_projection(xo_npca, xo_pca, projection_matrix, cos_window);
// calculate the new classifier coefficients
vector<Mat> dummy;
//x是特征图,作者对x求核变换,这里核用高斯核,然后求相关系数,具体得到的tmpKernel为图4
Mat tmpKernel = dense_gauss_kernel(params.sigma, x,dummy);
Mat v[2];
//做傅里叶变换得到kf
cv::dft(tmpKernel, kf, DFT_COMPLEX_OUTPUT);
//论文中公式(4)的分子
Mat new_alphaf_num = mul_complex_element_by_element(yf, kf);
vector<Mat> kf_;
cv::split(kf, kf_);
kf_[0] += params.lambda;
Mat tmp;
cv::merge(kf_, tmp);
//论文中公式(4)的分母
Mat new_alphaf_den = mul_complex_element_by_element(kf, tmp);
if (frame_index == 1)
{
// first frame_index, train with a single image
alphaf_num = new_alphaf_num;//论文中公式(5a),只不过这里P=1
alphaf_den = new_alphaf_den;//论文中公式(5b),只不过这里P=1
}
else
{
// subsequent frame_indexs, update the model
alphaf_num = (1 - params.learning_rate) * alphaf_num + params.learning_rate * new_alphaf_num;//论文中公式(5a),只不过这里P>1
alphaf_den = (1 - params.learning_rate) * alphaf_den + params.learning_rate * new_alphaf_den;//论文中公式(5b),只不过这里P>1
}
//save position
positions.push_back(make_pair(pos - Point(1, 1), target_sz));
//visualization
if (params.visualization == 1)
{
cv::Rect rect(pos.x - 1 - target_sz.width / 2, pos.y - 1 - target_sz.height / 2, target_sz.width, target_sz.height);
cv::rectangle(current_frame, rect, Scalar(0, 0, 255), 2);
imshow("current_frame", current_frame);
cv::waitKey(30);
}
frame_index++;
return get_position();
}
未完待更新。