TLD matlab版源码理解Part2（随笔）

CHEN7_98

已于 2023-12-07 19:36:38 修改

阅读量102

点赞数

分类专栏： TLD 文章标签： matlab 开发语言

于 2023-10-11 22:37:47 首次发布

本文链接：https://blog.csdn.net/BwatUluv/article/details/133780408

版权

TLD 专栏收录该内容

4 篇文章 0 订阅

订阅专栏

fern.cpp

// Copyright 2011 Zdenek Kalal
//
// This file is part of TLD.
// 
// TLD is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// 
// TLD is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
// 
// You should have received a copy of the GNU General Public License
// along with TLD.  If not, see <http://www.gnu.org/licenses/>.

#include "stdio.h"
#include "math.h"
#include <vector>
#include <map>
#include <set>
#include "tld.h"
//#ifdef _CHAR16T
//#define CHAR16_T
//#endif
#include "mex.h" 
using namespace std;

static double thrN;
static int nBBOX; // 所有grid的数目
static int mBBOX; // =6，相当于每一个grid用6个值表示，参考tld.grid，前面4个表示坐标，第5个表示第几个尺度，第6个表示同一行还有多少个同尺度的grid
static int nTREES; // 10棵树
static int nFEAT; // 13对特征点
static int nSCALE; // 尺度的数量
static int iHEIGHT; // 图高
static int iWIDTH; // 图宽
static int *BBOX = NULL; // BBOX相当于一个7×N的矩阵（这么理解就好了！实际上是一维的），在tld.grid的基础上得到，前四个值是gird坐标，第五个是该grid的面积，第六个是指向该尺度下特征点矩阵（OFF）首地址的, 第七个是该尺度下每行的窗格数目
static int *OFF  = NULL; // OFF指向一个大小为260m（m个尺度×10棵树×13对特征）的一维数组 // 从这个一维数组能够得到每棵树中的每对特征点，在每个尺度下的网格中的位置偏移量
static double *IIMG = 0; // 存储图像的积分结果
static double *IIMG2 = 0; // 存储图像的平方积分结果
static vector<vector <double> > WEIGHT; //WEIGHT[i][idx]，表示第i棵树中，特征idx出现在正样本中的次数与出现总次数的比例。
static vector<vector <int> > nP; // nP[i][idx]，表示第i棵树局部特征为idx的正样本出现的次数
static vector<vector <int> > nN; // nN则是负样本出现的次数
static int BBOX_STEP = 7; // 用于搜索BBOX的步长，因为BBOX相当于一个7×N的矩阵！ 
static int nBIT = 1; // number of bits per feature

// 定义了一个宏（macro），用于将二维坐标 (row, col) 转换为一维索引值
#define sub2idx(row,col,height) ((int) (floor((row)+0.5) + floor((col)+0.5)*(height))) 


void update(double *x, int C, int N) { // 传入的x为某个样本在10棵树下的局部比特特征，每一个局部比特特征都是长为13的二值序列
	for (int i = 0; i < nTREES; i++) {

		int idx = (int) x[i]; // 得到样本在第i棵树下的局部比特特征

		// nP[i][idx]，表示第i棵树特征是idx的正样本出现的次数
		// nN则是负样本出现的次数
		(C==1) ? nP[i][idx] += N : nN[i][idx] += N;

		if (nP[i][idx]==0) { // 在这种情况下，模型将权重 WEIGHT[i][idx] 设置为0
			WEIGHT[i][idx] = 0;
		} else { // 否则更新权重
			WEIGHT[i][idx] = ((double) (nP[i][idx])) / (nP[i][idx] + nN[i][idx]);
		}
	}
}


double measure_forest(double *idx) { // 计算得到fern分类器对某样本的得分，这里其实是基于该样本在10棵树下的局部比特特征得到其分数的
	double votes = 0;
	for (int i = 0; i < nTREES; i++) { 
		// WEIGHT[i][idx]，表示第i棵树中，特征idx出现在正样本中的次数与出现总次数的比例。
		votes += WEIGHT[i][idx[i]];
	}
	return votes;
}


// 计算比特特征的函数
int measure_tree_offset(unsigned char *img, int idx_bbox, int idx_tree) { 

	int index = 0;
	int *bbox = BBOX + idx_bbox*BBOX_STEP; // 这里是指得到特定下标的网格
	// OFF指向一个大小为260m（m个尺度×10棵树×13对特征）的一维数组
	// 从这个一维数组能够得到每棵树中的每对特征点，在每个尺度下的网格中的位置偏移量
	int *off = OFF + bbox[5] + idx_tree*2*nFEAT; // bbox[5]代表第几个有效尺度， idx_tree*2*nFEAT相当于跳到第几课树，所以这里也很明确
	for (int i=0; i<nFEAT; i++) {
		index<<=1;  // 左移一位
		// off[0] 是一个整数，表示一个特定特征点的偏移量。在这里，它用于定位特征点相对于特定尺度grid的位移
		// 偏移量加上grid本身的坐标，就得到了该特征点在该尺度网格下的像素位置，这里是用一维索引表示的
		int fp0 = img[off[0]+bbox[0]]; // 获取图像中的两个像素值 fp0 和 fp1，这些像素值用于进行特征的比较。
		int fp1 = img[off[1]+bbox[0]];
		if (fp0>fp1) { index |= 1;} // 如果 fp0 大于 fp1，则将 index 的最低位设置为 1，否则最低位保持为 0。这一步是为了根据像素值的比较来确定特征的状态。
		off += 2; // 将 off 偏移量更新，以便获取下一个特征的像素值，然后重复上述过程，直到所有特征都处理完毕。
	}
	return index;	
}


double measure_bbox_offset(unsigned char *blur, int idx_bbox, double minVar, double *tPatt) {

	double conf = 0.0;
	double bboxvar = bbox_var_offset(IIMG,IIMG2,BBOX+idx_bbox*BBOX_STEP); // // 在tld.cpp中有该函数的实现，用于计算图像内的方差
	if (bboxvar < minVar) {	return conf; } // 未通过方差分类器，则直接返回

	for (int i = 0; i < nTREES; i++) { 
		int idx = measure_tree_offset(blur,idx_bbox,i); // 得到bit特征
		tPatt[i] = idx; 
		conf += WEIGHT[i][idx]; // 根据特征，得到第i颗树对于特征Idx为正样本的后验概率，加到conf中，conf直接和0.5×树的数量相比较
	}
	return conf;
}

// 10棵树，每棵树都有13对特征点
// 这个函数就是计算每棵树中的每对特征点，在每个尺度下的网格中的位置偏移量
// 最终的计算结果由OFF指针指向
int* create_offsets(double *scale0, double *x0) {
	// nSCALE为有效尺度的个数，nTREES为10，nFEAT为13
	 // 分配空间，对于m个尺度，10棵树，每棵树13对特征点，每一对特征点的二维坐标转化为一维索引（x1y1和x2y2转换为索引1+索引2），共分配260m大小的空间
	int *offsets = (int*) malloc(nSCALE*nTREES*nFEAT*2*sizeof(int));
	int *off = offsets;

	for (int k = 0; k < nSCALE; k++){
		double *scale = scale0+2*k; // scale0即为tld.scale，是一个2×m的矩阵，所以每次跳2个元素，相当于遍历每一列
		for (int i = 0; i < nTREES; i++) {
			for (int j = 0; j < nFEAT; j++) {
				double *x  = x0 +4*j + (4*nFEAT)*i; // 每个feature是一个4维列向量，(4*13)*i即下一棵树 
				// scale[1]宽，scale[0]高,x[0]x坐标，x[1]y坐标
				*off++ = sub2idx((scale[0]-1)*x[1],(scale[1]-1)*x[0],iHEIGHT); // 记录第一个点在该尺度BOX的具体位置,并转化为索引
				*off++ = sub2idx((scale[0]-1)*x[3],(scale[1]-1)*x[2],iHEIGHT); // 记录第二个点在该尺度BOX的具体位置,并转化为索引
			}
		}
	}
	return offsets;
}

int* create_offsets_bbox(double *bb0) { // 传入的bb0即为tld.grid，代表了所有的网格，是一个6×n的矩阵
		
	int *offsets = (int*) malloc(BBOX_STEP*nBBOX*sizeof(int)); // BBOX_STEP=7，也就是说这里相当于分配了一个7×n矩阵的内存，并将其分配给offsets指针，n代表所有尺度下的网格数目
	int *off = offsets; // 定义一个新指针，为了后续指针偏移而不更改offsets的值，学过c的都能理解吧

	for (int i = 0; i < nBBOX; i++) {
		double *bb = bb0+mBBOX*i; // mBBOX=6，相当于每次遍历grid中的一列
		// 将计算出的索引值存储在 offsets 数组中，并将 off 指针移动到下一个位置，以便存储下一个值
		// 减1是因为下标从0开始
		*off++ = sub2idx(bb[1]-1,bb[0]-1,iHEIGHT); // 左上角
		*off++ = sub2idx(bb[3]-1,bb[0]-1,iHEIGHT); // 左下角
		*off++ = sub2idx(bb[1]-1,bb[2]-1,iHEIGHT); // 右上角
		*off++ = sub2idx(bb[3]-1,bb[2]-1,iHEIGHT); // 右下角
		*off++ = (int) ((bb[2]-bb[0])*(bb[3]-bb[1])); // 网格的面积
		*off++ = (int) (bb[4]-1)*2*nFEAT*nTREES; // bb[4]代表第几个有效尺度，因此这里是该尺度下特征点的首地址
		*off++ = bb[5]; // 同tld.gird中的第六行，代表每一行有多少个网格
	}
	return offsets;
}


double randdouble() 
{ 
	return rand()/(double(RAND_MAX)+1); 
} 


void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
{
	if (nrhs == 0) {
		mexPrintf("CLEANUP: function(0);\n");
		mexPrintf("INIT: function(1, img, bb, features, scales)\n");
		mexPrintf("UPDATE: Conf = function(2,X,Y,Margin,Bootstrap,Idx)\n");
		mexPrintf("EVALUATE: Conf = function(3,X)\n");
		mexPrintf("DETECT: function(4,img,maxBBox,minVar,Conf,X)\n");
		mexPrintf("GET PATTERNS: patterns = fern(5,img,idx,minVar)\n");
		return;
	}

	switch ((int) *mxGetPr(prhs[0])) {

		// CLEANUP: function(0);
		// 这段代码是一个清理操作，它的作用是将算法的状态重置为初始状态
		// =============================================================================
	case 0:  {
		srand(0); // 重设随机数种子

		thrN = 0; nBBOX = 0; mBBOX = 0; nTREES = 0; nFEAT = 0; nSCALE = 0; iHEIGHT = 0; iWIDTH = 0;

		free(BBOX); BBOX = 0; //释放内存
		free(OFF); OFF = 0;
		free(IIMG); IIMG = 0;
		free(IIMG2); IIMG2 = 0;
		WEIGHT.clear();
		nP.clear();
		nN.clear();
		return;
			 }

			 // INIT: function(1, img, grid, features, scales)
			 //                0  1    2       3         4
			 // =============================================================================
	case 1:  {

		if (nrhs!=5) { mexPrintf("fern: wrong input.\n"); return; }
		if (BBOX!=0) { mexPrintf("fern: already initialized.\n"); return; }

		iHEIGHT    = mxGetM(prhs[1]); // 全局图像的高度，y轴
		iWIDTH     = mxGetN(prhs[1]); // 全局图像的宽度，x轴
		nTREES     = mxGetN(mxGetField(prhs[3],0,"x")); // 参考tldGenerateFeatures.m，最终生成的tld.features是一个52×10的矩阵，这里获取矩阵的列数，也就是10，代表有10棵树
		nFEAT      = mxGetM(mxGetField(prhs[3],0,"x")) / 4; // 获取矩阵的行数，也就是52，再÷4即为13，代表13对特征点
		thrN       = 0.5 * nTREES; // 分类阈值，参考TLD算法原理，13棵树的预测结果取平均，大于0.5即为正样本，这里直接将0.5×13，再将其和预测结果的总和进行比较，效果是一样的
		nSCALE     = mxGetN(prhs[4]); // 有效尺度的个数

		IIMG       = (double*) malloc(iHEIGHT*iWIDTH*sizeof(double)); // 动态分配内存，用于存储积分图像，大小和原始全局图像相同
		IIMG2      = (double*) malloc(iHEIGHT*iWIDTH*sizeof(double)); // 动态分配内存，用于存储平方积分图像

		// BBOX
		mBBOX      = mxGetM(prhs[2]); // 即为6，因为tld.grid是一个6×n的矩阵，不记得的回去复习
		nBBOX      = mxGetN(prhs[2]); // 总共的网格数目，包含所有尺度
		// 创建保存网格数据索引等数据  
		// 这里是关键之一，将输入的窗格的坐标转换成内存地址，因为建立窗格时使用的是平面坐标，经过简单的换算就可以变成内存地址
		// BBOX是7*N的矩阵，前四个是左上，右下角的x,y坐标，第五个是该窗格的面积，第六个是指向该尺度下特征点矩阵首地址的, 第七个是该尺度下每行的窗格数目。这个是静态的，以后还会用到这个结构体
		BBOX	   = create_offsets_bbox(mxGetPr(prhs[2])); 
		double *x  = mxGetPr(mxGetField(prhs[3],0,"x")); // 指向tld.features.x，特征点矩阵，就是那个52*10的
		double *s  = mxGetPr(prhs[4]); // 指向tld.scales，代表有效尺度下gridbox的高和宽，为2Xm矩阵（m表示有效的尺度的个数）
		// 将特征点转换成每个尺度下的内存地址（一维索引）
		// 这里也很关键，因为feature就一个，而每个尺度下宽和高是不一样的，所有按照各自的比例将特征点矩阵换算成各自的地址，OFF的大小为m×13×10×2=260m，其中，2代表2个点
		OFF		   = create_offsets(s,x); 

		// 10×2^13的容器
		for (int i = 0; i<nTREES; i++) {
			WEIGHT.push_back(vector<double>(pow(2.0,nBIT*nFEAT), 0)); 
			nP.push_back(vector<int>(pow(2.0,nBIT*nFEAT), 0));
			nN.push_back(vector<int>(pow(2.0,nBIT*nFEAT), 0));
		}
		// 全部置为0
		for (int i = 0; i<nTREES; i++) {
			for (int j = 0; j < WEIGHT[i].size(); j++) {
				WEIGHT[i].at(j) = 0;
				nP[i].at(j) = 0;
				nN[i].at(j) = 0;
			}
		}

		return;
			 }

	// UPDATE
	// =============================================================================
	case 2: {

		if (nrhs!=5 && nrhs!=6) { mexPrintf("Conf = function(2,X,Y,Margin,Bootstrap,Idx)\n"); return; }
		//                                                   0 1 2 3      4         5
		// X为正负样本的局部二值特征， Y为标签
		double *X     = mxGetPr(prhs[1]); 
		int numX      = mxGetN(prhs[1]);
		double *Y     = mxGetPr(prhs[2]);
		double thrP   = *mxGetPr(prhs[3]) * nTREES; // 分类阈值，为tld.fern×10 = 5
		int bootstrap = (int) *mxGetPr(prhs[4]);


		int step = numX / 10; // step表示将所有样本划分为10个集合

		if (nrhs == 5) {
			for (int j = 0; j < bootstrap; j++) { // bootstrap控制需要重复处理整个样本集多少次
				for (int i = 0; i < step; i++) { // i代表循环到了某个子集内的第几个样本
					for (int k = 0; k < 10; k++) { // k代表循环到了第几个子集
					
						int I = k*step + i; // 这样就能得到当前处理的样本的索引，可以看到I会遍历整个样本集
						double *x = X+nTREES*I;  // 得到当前处理的样本在10棵树中的局部二值特征，传参至measure_forest中计算预测出的得分
						// 标签Y是绝对可信的
						// 因此只有在fern分类错误的时候，才更新，也就是更新WEIGHT、nP、nN
						if (Y[I] == 1) { // 如果标签为正但没有通过fern分类器，则更新fern分类器
							if (measure_forest(x) <= thrP)
								update(x,1,1);
						} else { // 如果标签为负但通过了fern分类器，也需要更新
							if (measure_forest(x) >= thrN)
								update(x,0,1);
						}
					}
				}
			}
		}
		if (nrhs == 6) {
			double *idx   = mxGetPr(prhs[5]);
			int nIdx      = mxGetN(prhs[5])*mxGetM(prhs[5]);


			for (int j = 0; j < bootstrap; j++) {
				for (int i = 0; i < nIdx; i++) {
					int I = idx[i]-1;
					double *x = X+nTREES*I;
					if (Y[I] == 1) {
						if (measure_forest(x) <= thrP)
							update(x,1,1);
					} else {
						if (measure_forest(x) >= thrN)
							update(x,0,1);
					}
				}
			}
		}



		if (nlhs==1) {
			plhs[0] = mxCreateDoubleMatrix(1, numX, mxREAL); 
			double *resp0 = mxGetPr(plhs[0]);

			for (int i = 0; i < numX; i++) {
				*resp0++ = measure_forest(X+nTREES*i);
			}
		}

		return;
			}

	// EVALUATE PATTERNS
	// =============================================================================
	case 3: {

		if (nrhs!=2) { mexPrintf("Conf = function(3,X)\n"); return; }
		//                                        0 1  
		// 初始化阶段生成的负样本局部特征矩阵的50% 
		double *X     = mxGetPr(prhs[1]); 
		int numX      = mxGetN(prhs[1]);

		plhs[0] = mxCreateDoubleMatrix(1, numX, mxREAL); // 指向输出结果，为一个1×numX的数组
		double *resp0 = mxGetPr(plhs[0]); 

		for (int i = 0; i < numX; i++) {
			*resp0++ = measure_forest(X+nTREES*i); // 计算得到每一个负样本的fern分类结果——置信度值，然后返回
		}

		return;
			}

			// DETECT: TOTAL RECALL
			// =============================================================================
	case 4: {

		if (nrhs != 6) { 
			mexPrintf("function(4,img,maxBBox,minVar,conf,patt)\n"); 
			//                  0 1   2       3      4    5   
			// maxBBox：是否调用检测器，为0则关闭检测器，为1则调用检测器
			// minVar：目标灰度方差的50%，即方差分类器的阈值
			// conf和patt：tld.tmp.conf，是网格属于正样本的比例，越高越可能是正样本，tld.tmp.patt，是每个网格的patten值，即局部二值特征
			return; 
		}

		// Pointer to preallocated output matrixes
		// 将fern分类器输出的置信度值存储在conf中，并将对应的局部特征存储在patt中
		double *conf = mxGetPr(prhs[4]); if ( mxGetN(prhs[4]) != nBBOX) { mexPrintf("Wrong input.\n"); return; }
		double *patt = mxGetPr(prhs[5]); if ( mxGetN(prhs[5]) != nBBOX) { mexPrintf("Wrong input.\n"); return; }
		for (int i = 0; i < nBBOX; i++) { conf[i] = -1; } // 遍历所有的grid

		// Setup sampling of the BBox
		double probability = *mxGetPr(prhs[2]);

		double nTest  = nBBOX * probability; if (nTest <= 0) return; // 如果maxBBox小于等于0，则代表关闭检测器，直接返回
		if (nTest > nBBOX) nTest = nBBOX;
		double pStep  = (double) nBBOX / nTest;  // 检测的步长，一般为1
		double pState = randdouble() * pStep;  // 代表当前检测到了第几个grid

		// Input images
		unsigned char *input = (unsigned char*) mxGetPr(mxGetField(prhs[1],0,"input"));
		unsigned char *blur  = (unsigned char*) mxGetPr(mxGetField(prhs[1],0,"blur"));

		// Integral images
		iimg(input,IIMG,iHEIGHT,iWIDTH); // 计算图像的积分
		iimg2(input,IIMG2,iHEIGHT,iWIDTH); // 计算图像的平方积分

		// log: 0 - not visited, 1 - visited
		int *log = (int*) calloc(nBBOX,sizeof(int)); // 用于检查是否遍历了所有网格

		// variance
		double minVar = *mxGetPr(prhs[3]);

		// totalrecall
		//double totalrecall = *mxGetPr(prhs[6]);
		int I = 0;
		int K = 2;

		while (1) 
		{
			// Get index of bbox
			I = (int) floor(pState);
			pState += pStep;
			if (pState >= nBBOX) { break; } 

			// measure bbox
			log[I] = 1; // 标记检测过了
			double *tPatt = patt + nTREES*I; // 有10棵树，每棵树都生成一个局部特征，而patt是存储最终的局部特征，因此，tPatt每次移动的步长也为10
			conf[I] = measure_bbox_offset(blur,I,minVar,tPatt); // 得到置信度值

			// total recall
			/*
			if (totalrecall == 1 && conf[i] > thrN) 
			{
				int I;
				int W = *(BBOX + i*BBOX_STEP + 6);

				for (int gH = -K; gH <= K; gH++ ) 
				{
					for (int gW = -K; gW <= K; gW++)
					{
						I = i+gW+(gH)*W;
						if (I >= 0 && I < nBBOX && log[I]==0)
						{
							log[I] = 1;
							tPatt = patt + nTREES*I;
							conf[I] = measure_bbox_offset(blur,I,minVar,tPatt);
						}
					}
				}
			}
			*/
		}

		free(log);
		return;
			}

			// GET PATTERNS
	case 5: {

		if (nrhs !=4) { mexPrintf("patterns = fern(5,img,idx,var)\n"); return; }
		//                                         0 1   2   3   

		// image
		unsigned char *input = (unsigned char*) mxGetPr(mxGetField(prhs[1],0,"input"));
		unsigned char *blur  = (unsigned char*) mxGetPr(mxGetField(prhs[1],0,"blur"));

		//if (mxGetM(prhs[1])!=iHEIGHT) { mexPrintf("fern: wrong input image.\n"); return; }

		// bbox indexes
		double *idx = mxGetPr(prhs[2]); // 距离目标BB最近的10/100个网格的编号
		int numIdx = mxGetM(prhs[2]) * mxGetN(prhs[2]); // 编号数量

		// minimal variance
		double minVar = *mxGetPr(prhs[3]); 
		if (minVar > 0) { // 如果提供了初始BB的方差值，则计算积分图像
			iimg(input,IIMG,iHEIGHT,iWIDTH);
			iimg2(input,IIMG2,iHEIGHT,iWIDTH);
		}

		// output patterns
		// 创建一个 `nTREES` 行 `numIdx` 列的双精度浮点数矩阵，并将它分配给 MEX 函数的第一个输出参数 `plhs[0]`。这个矩阵将用于存储特征模式的值，以便后续的计算和处理。
		plhs[0] = mxCreateDoubleMatrix(nTREES,numIdx,mxREAL);
		double *patt = mxGetPr(plhs[0]);

		plhs[1] = mxCreateDoubleMatrix(1,numIdx,mxREAL); // 一维数组，用于存储每个目标的状态，状态为 1 表示有效，0 表示无效。
		double *status = mxGetPr(plhs[1]);


		for (int j = 0; j < numIdx; j++) {

			if (minVar > 0) { // 如果 minVar 大于 0 并且目标的方差小于 minVar，则跳过当前目标，继续下一个目标。
				double bboxvar = bbox_var_offset(IIMG,IIMG2,BBOX+j*BBOX_STEP); // 在tld.cpp中有该函数的实现，用于计算图像内的方差
				if (bboxvar < minVar) {	continue; }
			}
			status[j] = 1; // 否则，将目标的状态设置为 1，表示目标有效
			// 遍历计算得到目标特征模式
			double *tPatt = patt + j*nTREES; // 有10棵决策树，所以一次增加10次
			for (int i = 0; i < nTREES; i++) {
				tPatt[i] = (double) measure_tree_offset(blur, idx[j]-1, i); // 计算该目标的特征模式，对于每棵决策树，计算其特征模式值，将这些值存储在 patt 数组中。
			}
		}
		return;
			}
	}

}

lk.cpp

// Copyright 2011 Zdenek Kalal
//
// This file is part of TLD.
// 
// TLD is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// 
// TLD is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
// 
// You should have received a copy of the GNU General Public License
// along with TLD.  If not, see <http://www.gnu.org/licenses/>.

#include "cv.h"
#include "highgui.h"
#include "math.h"
#include <limits>
//#ifdef _CHAR16T
//#define CHAR16_T
//#endif
#include "mex.h" 


const int MAX_COUNT = 500;
const int MAX_IMG   = 2;
int win_size = 4;
CvPoint2D32f* points[3] = {0,0,0};
static IplImage **IMG = 0;
static IplImage **PYR = 0;

void loadImageFromMatlab(const mxArray *mxImage, IplImage *image) {

	unsigned char *values =  (unsigned char *) mxGetPr(mxImage);
	int widthStep = image->widthStep;
	int N = mxGetN(mxImage); // width
	int M = mxGetM(mxImage); // height

	if (N == 0 || M == 0) {
		printf("Input image error\n");
		return;
	}

	for(int i=0;i<N;i++)
		for(int j=0;j<M;j++) 
			image->imageData[j*widthStep+i] = values[j+i*M];
}

void euclideanDistance (CvPoint2D32f *point1, CvPoint2D32f *point2, float *match, int nPts) {

	for (int i = 0; i < nPts; i++) {

		match[i] = sqrt((point1[i].x - point2[i].x)*(point1[i].x - point2[i].x) + 
		(point1[i].y - point2[i].y)*(point1[i].y - point2[i].y) );

	}
}

void normCrossCorrelation(IplImage *imgI, IplImage *imgJ, CvPoint2D32f *points0, CvPoint2D32f *points1, int nPts, char *status, float *match,int winsize, int method) {


	IplImage *rec0 = cvCreateImage( cvSize(winsize, winsize), 8, 1 );
	IplImage *rec1 = cvCreateImage( cvSize(winsize, winsize), 8, 1 );
	IplImage *res  = cvCreateImage( cvSize( 1, 1 ), IPL_DEPTH_32F, 1 );

	for (int i = 0; i < nPts; i++) {
		if (status[i] == 1) {
			cvGetRectSubPix( imgI, rec0, points0[i] );
			cvGetRectSubPix( imgJ, rec1, points1[i] );
			cvMatchTemplate( rec0,rec1, res, method );
			match[i] = ((float *)(res->imageData))[0]; 

		} else {
			match[i] = 0.0;
		}
	}
	cvReleaseImage( &rec0 );
	cvReleaseImage( &rec1 );
	cvReleaseImage( &res );

}


void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
{

	double nan = std::numeric_limits<double>::quiet_NaN();
	double inf = std::numeric_limits<double>::infinity();

	if (nrhs == 0) {
		mexPrintf("Lucas-Kanade\n");
		return;
	}


	switch ((int) *mxGetPr(prhs[0])) {

		// initialize or clean up
		case 0: {
			
			// 如果 IMG 和 PYR 都已经被分配内存且不为空，表示图像数据已经存在
			// 则进入循环，释放图像数据的内存，并将相应的指针设置为0，以释放资源。
			if (IMG!=0 && PYR!=0) {

				for (int i = 0; i < MAX_IMG; i++) {
					cvReleaseImage(&(IMG[i])); IMG[i] = 0;
					cvReleaseImage(&(PYR[i])); PYR[i] = 0;
				}
				free(IMG); IMG = 0;
				free(PYR); PYR = 0;
				//mexPrintf("LK: deallocated\n");
			}

			// 分配了新的内存来初始化 IMG 和 PYR
			// 这里使用了 calloc 函数，为 2 个 IplImage* 指针分配内存，即分配了一个可以存储当前及下一帧图像指针的数组。
			// 在执行完这行语句之后，会创造出一个指针数组，这个数组由IMG指向，并且数组中的每个指针元素都为0
			IMG = (IplImage**) calloc(MAX_IMG,sizeof(IplImage*));
			PYR = (IplImage**) calloc(MAX_IMG,sizeof(IplImage*));
			//mexPrintf("LK: initialized\n");

			return;
				}

		// tracking
		case 2: {

			// 检查变量 IMG 是否为零（空指针），如果是零，表示图像数据尚未初始化，需要进行初始化。IMG 是一个指向图像数据的指针数组。
			// 同时检查输入参数的数量
			if (IMG == 0 || (nrhs != 5 && nrhs != 6)) { 
				mexPrintf("lk(2,imgI,imgJ,ptsI,ptsJ,Level)\n");
				//            0 1    2    3    4   
				return;
			}
			int Level; // 整数变量 Level，用于存储图像金字塔的级别。
			if (nrhs == 6) {
				Level = (int) *mxGetPr(prhs[5]);
			} else {
				Level = 5;
			}


			int I       = 0;
			int J       = 1;
			int Winsize = 10;

			// Images
			if (IMG[I] != 0) {
				loadImageFromMatlab(prhs[1],IMG[I]);
			} else {
				CvSize imageSize = cvSize(mxGetN(prhs[1]),mxGetM(prhs[1]));
				IMG[I] = cvCreateImage( imageSize, 8, 1 ); // 8表示数据是8位的，也就0~255,1表示是单通道，也就是灰度图像
				PYR[I] = cvCreateImage( imageSize, 8, 1 );
				loadImageFromMatlab(prhs[1],IMG[I]);
			}

			if (IMG[J] != 0) {
				loadImageFromMatlab(prhs[2],IMG[J]);
			} else {
				CvSize imageSize = cvSize(mxGetN(prhs[2]),mxGetM(prhs[2]));
				IMG[J] = cvCreateImage( imageSize, 8, 1 );
				PYR[J] = cvCreateImage( imageSize, 8, 1 );
				loadImageFromMatlab(prhs[2],IMG[J]);
			}

			// Points
			double *ptsI = mxGetPr(prhs[3]); int nPts = mxGetN(prhs[3]); // nPts为跟踪点的数量，一般是10×10=100
			double *ptsJ = mxGetPr(prhs[4]); 

			if (nPts != mxGetN(prhs[4])) {
				mexPrintf("Inconsistent input!\n");
				return;
			}

			// 使用 cvAlloc 函数分配内存，创建了三个大小为 nPts（特征点数量）的点数组 points[0]、points[1] 和 points[2]
			points[0] = (CvPoint2D32f*)cvAlloc(nPts*sizeof(CvPoint2D32f)); // template
			points[1] = (CvPoint2D32f*)cvAlloc(nPts*sizeof(CvPoint2D32f)); // target
			points[2] = (CvPoint2D32f*)cvAlloc(nPts*sizeof(CvPoint2D32f)); // forward-backward

			for (int i = 0; i < nPts; i++) {
				// 输入的prhs[3]和prhs[4]是2×100（10×10个点）的矩阵，每一列代表一个点的x和y坐标
				// 然而ptsI和ptsJ是一维数组，matlab的矩阵遍历是从上往下从左到右的
				// 所以这里就是为什么x坐标为i乘2，y坐标为i乘2加1
				points[0][i].x = ptsI[2*i]; points[0][i].y = ptsI[2*i+1]; 
				points[1][i].x = ptsJ[2*i]; points[1][i].y = ptsJ[2*i+1];
				points[2][i].x = ptsI[2*i]; points[2][i].y = ptsI[2*i+1];
			}

			// 分配了四个浮点数组 ncc、ssd、fb 和字符数组 status 用于存储不同计算的结果和特征点的状态，大小为1×100
			float *ncc    = (float*) cvAlloc(nPts*sizeof(float));
			float *ssd    = (float*) cvAlloc(nPts*sizeof(float));
			float *fb     = (float*) cvAlloc(nPts*sizeof(float));
			char  *status = (char*)  cvAlloc(nPts);

			// 参照 https://www.cnblogs.com/xingma0910/archive/2012/07/26/2610281.html
			// 也可以参照 https://blog.csdn.net/qq_28584889/article/details/103261377
			// 执行正向跟踪（预测）与反向跟踪（计算FB error）
			// 应用光流法，从第I帧跟踪到第I+1帧，结果存储在points[1]中
			cvCalcOpticalFlowPyrLK( IMG[I], IMG[J], PYR[I], PYR[J], points[0], points[1], nPts, cvSize(win_size,win_size), Level, status, 0, cvTermCriteria(CV_TERMCRIT_ITER|CV_TERMCRIT_EPS,20,0.03), CV_LKFLOW_INITIAL_GUESSES);
			// 反向跟踪，从第I+1帧跟踪到第I帧，结果存储在points[2]中
			cvCalcOpticalFlowPyrLK( IMG[J], IMG[I], PYR[J], PYR[I], points[1], points[2], nPts, cvSize(win_size,win_size), Level, status, 0, cvTermCriteria(CV_TERMCRIT_ITER|CV_TERMCRIT_EPS,20,0.03), CV_LKFLOW_INITIAL_GUESSES | CV_LKFLOW_PYR_A_READY | CV_LKFLOW_PYR_B_READY );
			
			normCrossCorrelation(IMG[I],IMG[J],points[0],points[1],nPts, status, ncc, Winsize,CV_TM_CCOEFF_NORMED);
			//normCrossCorrelation(IMG[I],IMG[J],points[0],points[1],nPts, status, ssd, Winsize,CV_TM_SQDIFF);
			euclideanDistance( points[0],points[2],fb,nPts);
			

			// Output
			// 定义了每个特征点的输出信息的维度。有4个属性，分别是 x 坐标、y 坐标、FB error、和归一化互相关匹配分数（ncc）
			int M = 4; 
			plhs[0] = mxCreateDoubleMatrix(M, nPts, mxREAL);
			double *output = mxGetPr(plhs[0]);
			// 存储预测的横纵坐标，以及FB error和ncc系数
			for (int i = 0; i < nPts; i++) {
				if (status[i] == 1) {
					output[M*i]   = (double) points[1][i].x;
					output[M*i+1] = (double) points[1][i].y;
					output[M*i+2] = (double) fb[i];
					output[M*i+3] = (double) ncc[i];
					//output[M*i+4] = (double) ssd[i];
				} else {
					output[M*i]   = nan;
					output[M*i+1] = nan;
					output[M*i+2] = nan;
					output[M*i+3] = nan;
					//output[M*i+4] = nan;
				}
			}

			return;
				}

	}

}