Adaptive Color Attributes for Real-Time Visual Tracking 论文笔记&代码解析

最新推荐文章于 2019-01-08 19:46:39 发布

sansanfree

最新推荐文章于 2019-01-08 19:46:39 发布

阅读量2.9k

点赞数 3

文章标签：目标跟踪 Color Name CSK模型相关滤波

本文链接：https://blog.csdn.net/sansanfree/article/details/79889238

版权

附上本篇论文作者主页：http://www.cvl.isy.liu.se/research/objrec/visualtracking/colvistrack/index.html （有论文原文和matlab代码）

我从github上找的c++版本代码：https://github.com/mostafaizz/ColorTracker

本论文是目标跟踪领域内大神MD在14年CVPR发表的论文，这篇论文利用Color Name提取目标特征图，利用CSK跟踪模型，CSK模型是大名鼎鼎的相关滤波KCF（2015年 TPAMI）的前作，进行目标跟踪。本篇博客简要介绍一下其原理，重点介绍其实现的C++代码。

CSK论文：http://fcv2011.ulsan.ac.kr/files/announcement/455/csk_tracker_eccv2012.pdf

1.背景介绍

1.1 CSK模型

CSK模型这篇论文 https://blog.csdn.net/carrierlxksuper/article/details/50328437

KCF模型（后作）推荐这篇论文，https://www.cnblogs.com/YiXiaoZhou/p/5925019.html，讲的非常好。

1.2颜色跟踪模型

在这里所说的color name是之前定的一篇论文的成果《 Learning color names for real-world applications，2009》，他可以提取图像的11维颜色特征，再加上图像的灰度特征，作为传统CSK跟踪模型的特征map x,传统的CSK模型特征 map是一维的，这里做了改进，而且后续的KCF模型中特征图x是提取图像的HOG作为feature map x的。

图1

图2

原理自行阅读论文，这些公式在之后的代码中是有所体现的。

1.3降维

作者为了减少特征维度，采用PCA降维方法，具体公式为（6）和（7），然后作者给出了算法流程图。

图3 algorithm流程图

图4 对x求高斯变换，再作相关操作

图5 CSK论文中的CSK跟踪模型

具体不解释了，自行理解，具体代码有所体现。

2.代码分析

我贴的为c++代码，下载请从开头给出的github网址下载即可。

2.1主函数

void test_ball1()
{
	ColorTrackerParameters params; //定义一个ColorTrackerParameters的类对象params,包括了很多对象成员变量；
	params.visualization = 1;   //就是设置显示实时跟踪结果图
	cv::Point pos = cv::Point(492, 417);//初始跟踪矩形目标框的左上角坐标（x，y）
	cv::Size target_sz = cv::Size(47, 46);//矩形目标框的大小位置
	params.init_pos.x = (int)(floor(pos.x) + floor(target_sz.width / 2));//矩形目标中心位置坐标x
	params.init_pos.y = (int)(floor(pos.y) + floor(target_sz.height / 2));//矩形目标中心位置坐标y
	params.wsize = cv::Size((int)floor(target_sz.width), (int)floor(target_sz.height));

	ColorTracker tracker(params);//初始化跟踪器参数，可自行打开阅读
	tracker.init_tracking();//初始化跟踪器
	for (int frame_index = 1; frame_index <= 105; frame_index++) //ball1总共有105帧
	{
		ostringstream ostr;
		ostr << "sequences/ball1/imgs/";
		ostr << setfill('0') << setw(8) << frame_index << ".jpg";
		cv::Mat current_frame = cv::imread(ostr.str()); //opencv函数读取图片,图片返回作为当前帧current_frame
		tracker.track_frame(current_frame);//跟踪当前帧
	}
	cv::waitKey(0);
}

int main(int argc, char** argv)
{
	//test_Scate();
	//test_soccer();
	//test_video();
	test_ball1();  //测试跟踪足球（VOT2015中的一个数据集）
	return 0;
}

2.2track.init_tracking()函数

void ColorTracker::init_tracking()
{
	// use_dimensionality_reduction
	use_dimensionality_reduction = (params.compressed_features.size() != 0);

	target_sz = cv::Size(params.wsize.width, params.wsize.height);
	pos = cv::Point(params.init_pos.x, params.init_pos.y);

	// window size, taking padding into account
	sz_with_padding = cv::Size((int)floor(target_sz.width * (1 + params.padding)), (int)floor(target_sz.height * (1 + params.padding)));
        //扩选上一帧目标区域的大小，这里params.padding为1，基本就是将目标区域扩大一倍。
	// desired output(gaussian shaped), bandwidth proportional to target size
	double output_sigma = sqrt(target_sz.width * target_sz.height) * params.output_sigma_factor;
	Mat y = Mat::zeros(sz_with_padding, CV_64FC1);
	double output_sigma_square = output_sigma * output_sigma;
	int ind = 0;
	double *data = ((double*)(y.data));
	for (int i = 1; i <= sz_with_padding.height; i++)
	{
		for (int j = 1; j <= sz_with_padding.width; j++)
		{
			int tmpCs = j - sz_with_padding.width / 2;
			int tmpRs = i - sz_with_padding.height / 2;
			data[ind++] = exp(-0.5 / output_sigma_square * (tmpRs * tmpRs + tmpCs * tmpCs));
		}
	}

	cv::dft(y, yf, DFT_COMPLEX_OUTPUT);
        //以上操作:初始帧的扩选区域为sz_with_padding ，以扩选区域中心位置做高斯衰减得到y，对y做DFT变换，得到yf.
	// store pre - computed cosine window
	Mat hann_1 = Mat::zeros(cv::Size(1, sz_with_padding.height), CV_64FC1);//hann窗
	ind = 0;
	data = ((double*)hann_1.data);
	for (int i = 0; i < sz_with_padding.height; i++)
	{
		data[ind++] = (0.5 * (1 - cos(2 * 3.14159265359 * i / (sz_with_padding.height - 1))));
	}
	Mat hann_2 = Mat::zeros(cv::Size(1, sz_with_padding.width), CV_64FC1);
	ind = 0;
	data = ((double*)hann_2.data);
	for (int i = 0; i < sz_with_padding.width; i++)
	{
		data[ind++] = (0.5 * (1 - cos(2 * 3.14159265359 * i / (sz_with_padding.width - 1))));
	}

	cos_window = hann_1 * hann_2.t(); //con_window是根据sz_with_padding制作的hann窗
	frame_index = 1;
}

2.3ColorTracker::track_frame()函数

cv::Rect ColorTracker::track_frame(Mat ¤t_frame)
{
	if (frame_index > 1)  //除了第一帧之外的后序帧
	{
		// compute the compressed learnt appearance
		vector<Mat> zp = feature_projection(z_npca, z_pca, projection_matrix, cos_window);//提取上一帧真实目标特征map zp

		// extract the feature map of the local image patch
		Mat xo_npca, xo_pca;
		get_subwindow(current_frame, pos, sz_with_padding, params.non_compressed_features, params.compressed_features, w2c, xo_npca, xo_pca);

		// do the dimensionality reduction and windowing
		//得到当前帧的候选区域特征map x
		vector<Mat> x = feature_projection(xo_npca, xo_pca, projection_matrix, cos_window);

		// calculate the response of the classifier
		//对应着CSK模型的跟踪，如博客图5所示
		cv::dft(dense_gauss_kernel(params.sigma, x, zp), kf, DFT_COMPLEX_OUTPUT);
		num1 = mul_complex_element_by_element(alphaf_num, kf);
		num2 = mul_complex_element_by_element_second_conjugate(num1, alphaf_den);
		denum1 = mul_complex_element_by_element_second_conjugate(alphaf_den, alphaf_den);
		cv::split(denum1, denum);
		cv::split(num2, num);
		cv::divide(num[0], denum[0], num[0]);
		cv::divide(num[1], denum[0], num[1]);
		cv::merge(num, tmp_r);
		//求最大响应图
		cv::dft(tmp_r, response, DFT_INVERSE | DFT_REAL_OUTPUT);
		response = response / (response.cols * response.rows);

		// target location is at the maximum response
		//更新当前帧目标位置和大小
		double minVal;
		double maxVal;
		Point minLoc;
		Point maxLoc;
		cv::minMaxLoc(response, &minVal, &maxVal, &minLoc, &maxLoc);
		pos = pos - cv::Point((int)floor(sz_with_padding.width / 2), (int)floor(sz_with_padding.height / 2))
			+ cv::Point(maxLoc.x + 1, maxLoc.y + 1);
		if (pos.x < 0)
		{
			pos.x = 0;
		}
		if (pos.y < 0)
		{
			pos.y = 0;
		}
		if (pos.x >= current_frame.cols)
		{
			pos.x = current_frame.cols - 1;
		}
		if (pos.y >= current_frame.rows)
		{
			pos.y = current_frame.rows;
		}
	}
	else
	{
		im_patch = Mat::zeros(sz_with_padding.height, sz_with_padding.width, current_frame.type());
	}

	// extract the feature map of the local image patch to train the classifer
	//取当前帧的目标特征图
	//w2c为加载的Color Name特征，xo_npca, xo_pca为得到的非PCA和PCA特征。
	get_subwindow(current_frame, pos, sz_with_padding, params.non_compressed_features,
		params.compressed_features, w2c, xo_npca, xo_pca);

	if (frame_index == 1)
	{
		// initialize the appearance
		z_npca = xo_npca;
		z_pca = xo_pca;

		// set number of compressed dimensions to maximum if too many
		params.num_compressed_dim = std::min(params.num_compressed_dim, xo_pca.cols);
	}
	else
	{
		// update the appearance
		z_npca = (1 - params.learning_rate) * z_npca + params.learning_rate * xo_npca;     //对应着论文公式（5c)
		z_pca = (1 - params.learning_rate) * z_pca + params.learning_rate * xo_pca;       //对应着论文公式（5c)
	}

	// if dimensionality reduction is used: update the projection matrix
	if (use_dimensionality_reduction)
	{
		if (frame_index == 1)
		{
			// compute the mean appearance
			data_matrix = Mat::zeros(z_pca.rows, z_pca.cols, CV_64FC1);
		}
		// compute the mean appearance
		reduce(z_pca, data_mean, 0, CV_REDUCE_AVG);

		// substract the mean from the appearance to get the data matrix
		double*data = ((double*)data_matrix.data);
		for (int i = 0; i < z_pca.rows; i++)
		{
			memcpy(data + i * z_pca.cols, ((Mat)(z_pca.row(i) - data_mean)).data, z_pca.cols * sizeof(double));
		}

		// calculate the covariance matrix
		//对应着algorithm公式1和2
		cov_matrix = (1.0 / (sz_with_padding.width * sz_with_padding.height - 1))
			* (data_matrix.t() * data_matrix);
		//cov_matrix.convertTo(cov_matrix, CV_32FC1);

		// calculate the principal components (pca_basis) and corresponding variances
		if (frame_index == 1)
		{
			Mat vt;
			cv::SVD::compute(cov_matrix, pca_variances, pca_basis, vt); //cov_matrix是algorithm 步骤3中的R1，这里cv::SVD::compute执行步骤8，pca_variances为步骤8中的Sp, pca_basis为Ep, vt为Ep的转置，在这里第一帧，所以p为1
		}
		else
		{
			Mat vt;
			cv::SVD::compute((1 - params.compression_learning_rate) * old_cov_matrix + params.compression_learning_rate * cov_matrix,
				pca_variances, pca_basis, vt); //对应着algorithm 步骤6和8
		}

		// calculate the projection matrix as the first principal
		// components and extract their corresponding variances
		//执行algorithm 步骤9和10
		projection_matrix = pca_basis(cv::Rect(0, 0, params.num_compressed_dim, pca_basis.rows)).clone();
		Mat projection_variances = Mat::zeros(params.num_compressed_dim, params.num_compressed_dim, CV_64FC1);
		for (int i = 0; i < params.num_compressed_dim; i++)
		{
			((double*)projection_variances.data)[i + i*params.num_compressed_dim] = ((double*)pca_variances.data)[i];
		}
		
		if (frame_index == 1)
		{
			// initialize the old covariance matrix using the computed
			// projection matrix and variances
			//执行algorithm 步骤12
			old_cov_matrix = projection_matrix * projection_variances * projection_matrix.t();
		}
		else
		{
			// update the old covariance matrix using the computed
			// projection matrix and variances
			//执行algorithm 步骤14
			old_cov_matrix =
				(1 - params.compression_learning_rate) * old_cov_matrix +
				params.compression_learning_rate * (projection_matrix * projection_variances * projection_matrix.t());
		}
	}


	// project the features of the new appearance example using the new
	// projection matrix
	//因为projection_matrix更新了，重新得到特征图x
	vector<Mat> x = feature_projection(xo_npca, xo_pca, projection_matrix, cos_window);

	// calculate the new classifier coefficients
	vector<Mat> dummy;
	//x是特征图，作者对x求核变换，这里核用高斯核，然后求相关系数，具体得到的tmpKernel为图4
	Mat tmpKernel = dense_gauss_kernel(params.sigma, x,dummy);
	Mat v[2];
    //做傅里叶变换得到kf
	cv::dft(tmpKernel, kf, DFT_COMPLEX_OUTPUT);
	//论文中公式（4）的分子
	Mat new_alphaf_num = mul_complex_element_by_element(yf, kf);
	vector<Mat> kf_;
	cv::split(kf, kf_);
	kf_[0] += params.lambda;
	Mat tmp;
	cv::merge(kf_, tmp);
	//论文中公式（4）的分母
	Mat new_alphaf_den = mul_complex_element_by_element(kf, tmp);

	if (frame_index == 1)
	{
		// first frame_index, train with a single image
		alphaf_num = new_alphaf_num;//论文中公式（5a），只不过这里P=1
		alphaf_den = new_alphaf_den;//论文中公式（5b），只不过这里P=1
	}
	else
	{
		// subsequent frame_indexs, update the model
		alphaf_num = (1 - params.learning_rate) * alphaf_num + params.learning_rate * new_alphaf_num;//论文中公式（5a），只不过这里P>1
		alphaf_den = (1 - params.learning_rate) * alphaf_den + params.learning_rate * new_alphaf_den;//论文中公式（5b），只不过这里P>1
	}

	//save position
	positions.push_back(make_pair(pos - Point(1, 1), target_sz));

	//visualization
	if (params.visualization == 1)
	{
		cv::Rect rect(pos.x - 1 - target_sz.width / 2, pos.y - 1 - target_sz.height / 2, target_sz.width, target_sz.height);
		cv::rectangle(current_frame, rect, Scalar(0, 0, 255), 2);
		imshow("current_frame", current_frame);
		cv::waitKey(30);
	}
	frame_index++;

	return get_position();
}

未完待更新。