TLD源码理解

最新推荐文章于 2017-03-19 17:51:24 发布

围观岳老师

最新推荐文章于 2017-03-19 17:51:24 发布

阅读量3.4k

点赞数 1

分类专栏：移动检测跟踪 TLD 文章标签： TLD tld TLD源码

移动检测跟踪同时被 2 个专栏收录

20 篇文章 4 订阅

订阅专栏

TLD

5 篇文章 0 订阅

订阅专栏

转自：http://blog.csdn.net/zouxy09/article/details/7893032

run_tld.cpp

#include <opencv2/opencv.hpp>
#include <tld_utils.h>
#include <iostream>
#include <sstream>  //c++中的sstream类，提供了程序和string对象之间的I/O，可以通过ostringstream
					//和instringstream两个类来声明对象，分别对应输出流和输入流
#include <TLD.h>
#include <stdio.h>
using namespace cv;
using namespace std;
//Global variables
Rect box;
bool drawing_box = false;
bool gotBB = false;
bool tl = true;
bool rep = false;
bool fromfile=false;
string video;

//读取记录bounding box的文件，获得bounding box的四个参数：左上角坐标x，y和宽高
/*如在\datasets\06_car\init.txt中：记录了初始目标的bounding box，内容如下
142,125,232,164   
*/
void readBB(char* file){
  ifstream bb_file (file);  //以输入方式打开文件
  string line;
  //istream& getline ( istream& , string& );
  //将输入流is中读到的字符存入str中，终结符默认为 '\n'（换行符） 
  getline(bb_file, line);
  istringstream linestream(line); //istringstream对象可以绑定一行字符串，然后以空格为分隔符把该行分隔开来。
  string x1,y1,x2,y2;
  
  //istream& getline ( istream &is , string &str , char delim ); 
  //将输入流is中读到的字符存入str中，直到遇到终结符delim才结束。
  getline (linestream,x1, ',');
  getline (linestream,y1, ',');
  getline (linestream,x2, ',');
  getline (linestream,y2, ',');
  
  //atoi 功 能： 把字符串转换成整型数
  int x = atoi(x1.c_str());// = (int)file["bb_x"];
  int y = atoi(y1.c_str());// = (int)file["bb_y"];
  int w = atoi(x2.c_str())-x;// = (int)file["bb_w"];
  int h = atoi(y2.c_str())-y;// = (int)file["bb_h"];
  box = Rect(x,y,w,h);
}

//bounding box mouse callback
//鼠标的响应就是得到目标区域的范围，用鼠标选中bounding box。
void mouseHandler(int event, int x, int y, int flags, void *param){
  switch( event ){
  case CV_EVENT_MOUSEMOVE:
    if (drawing_box){
        box.width = x-box.x;
        box.height = y-box.y;
    }
    break;
  case CV_EVENT_LBUTTONDOWN:
    drawing_box = true;
    box = Rect( x, y, 0, 0 );
    break;
  case CV_EVENT_LBUTTONUP:
    drawing_box = false;
    if( box.width < 0 ){
        box.x += box.width;
        box.width *= -1;
    }
    if( box.height < 0 ){
        box.y += box.height;
        box.height *= -1;
    }
    gotBB = true;   //已经获得bounding box
    break;
  }
}

void print_help(char** argv){
  printf("use:\n     %s -p /path/parameters.yml\n",argv[0]);
  printf("-s    source video\n-b        bounding box file\n-tl  track and learn\n-r     repeat\n");
}

//分析运行程序时的命令行参数
void read_options(int argc, char** argv, VideoCapture& capture, FileStorage &fs){
  for (int i=0;i<argc;i++){
      if (strcmp(argv[i],"-b")==0){
          if (argc>i){
              readBB(argv[i+1]);  //是否指定初始的bounding box
              gotBB = true;
          }
          else
            print_help(argv);
      }
      if (strcmp(argv[i],"-s")==0){   //从视频文件中读取
          if (argc>i){
              video = string(argv[i+1]);
              capture.open(video);
              fromfile = true;
          }
          else
            print_help(argv);

      }
	  //Similar in format to XML, Yahoo! Markup Language (YML) provides functionality to Open 
	  //Applications in a safe and standardized fashion. You include YML tags in the HTML code
	  //of an Open Application.
      if (strcmp(argv[i],"-p")==0){   //读取参数文件parameters.yml
          if (argc>i){
		  //FileStorage类的读取方式可以是：FileStorage fs(".\\parameters.yml", FileStorage::READ);  
              fs.open(argv[i+1], FileStorage::READ);
          }
          else
            print_help(argv);
      }
      if (strcmp(argv[i],"-no_tl")==0){  //To train only in the first frame (no tracking, no learning)
          tl = false;
      }
      if (strcmp(argv[i],"-r")==0){  //Repeat the video, first time learns, second time detects
          rep = true;
      }
  }
}

/*
运行程序时：
%To run from camera
./run_tld -p ../parameters.yml
%To run from file
./run_tld -p ../parameters.yml -s ../datasets/06_car/car.mpg
%To init bounding box from file
./run_tld -p ../parameters.yml -s ../datasets/06_car/car.mpg -b ../datasets/06_car/init.txt
%To train only in the first frame (no tracking, no learning)
./run_tld -p ../parameters.yml -s ../datasets/06_car/car.mpg -b ../datasets/06_car/init.txt -no_tl 
%To test the final detector (Repeat the video, first time learns, second time detects)
./run_tld -p ../parameters.yml -s ../datasets/06_car/car.mpg -b ../datasets/06_car/init.txt -r
*/
//感觉就是对起始帧进行初始化工作，然后逐帧读入图片序列，进行算法处理。
int main(int argc, char * argv[]){
  VideoCapture capture;
  capture.open(0);
  
  //OpenCV的C++接口中，用于保存图像的imwrite只能保存整数数据，且需作为图像格式。当需要保存浮
  //点数据或XML/YML文件时，OpenCV的C语言接口提供了cvSave函数，但这一函数在C++接口中已经被删除。
  //取而代之的是FileStorage类。
  FileStorage fs;
  //Read options
  read_options(argc, argv, capture, fs);  //分析命令行参数
  //Init camera
  if (!capture.isOpened())
  {
	cout << "capture device failed to open!" << endl;
    return 1;
  }
  //Register mouse callback to draw the bounding box
  cvNamedWindow("TLD",CV_WINDOW_AUTOSIZE);
  cvSetMouseCallback( "TLD", mouseHandler, NULL );  //用鼠标选中初始目标的bounding box
  //TLD framework
  TLD tld;
  //Read parameters file
  tld.read(fs.getFirstTopLevelNode());
  Mat frame;
  Mat last_gray;
  Mat first;
  if (fromfile){  //如果指定为从文件读取
      capture >> frame;   //读当前帧
      cvtColor(frame, last_gray, CV_RGB2GRAY);  //转换为灰度图像
      frame.copyTo(first);  //拷贝作为第一帧
  }else{   //如果为读取摄像头，则设置获取的图像大小为320x240 
      capture.set(CV_CAP_PROP_FRAME_WIDTH,340);  //340？？
      capture.set(CV_CAP_PROP_FRAME_HEIGHT,240);
  }

  ///Initialization
GETBOUNDINGBOX:   //标号：获取bounding box
  while(!gotBB)
  {
    if (!fromfile){
      capture >> frame;
    }
    else
      first.copyTo(frame);
    cvtColor(frame, last_gray, CV_RGB2GRAY);
    drawBox(frame,box);  //把bounding box 画出来
    imshow("TLD", frame);
    if (cvWaitKey(33) == 'q')
	    return 0;
  }
  //由于图像片（min_win 为15x15像素）是在bounding box中采样得到的，所以box必须比min_win要大
  if (min(box.width, box.height)<(int)fs.getFirstTopLevelNode()["min_win"]){
      cout << "Bounding box too small, try again." << endl;
      gotBB = false;
      goto GETBOUNDINGBOX;
  }
  //Remove callback
  cvSetMouseCallback( "TLD", NULL, NULL );  //如果已经获得第一帧用户框定的box了，就取消鼠标响应
  printf("Initial Bounding Box = x:%d y:%d h:%d w:%d\n",box.x,box.y,box.width,box.height);
  //Output file
  FILE  *bb_file = fopen("bounding_boxes.txt","w");
  
  //TLD initialization
  tld.init(last_gray, box, bb_file);

  ///Run-time
  Mat current_gray;
  BoundingBox pbox;
  vector<Point2f> pts1;
  vector<Point2f> pts2;
  bool status=true;  //记录跟踪成功与否的状态 lastbox been found
  int frames = 1;  //记录已过去帧数
  int detections = 1;  //记录成功检测到的目标box数目
  
REPEAT:
  while(capture.read(frame)){
    //get frame
    cvtColor(frame, current_gray, CV_RGB2GRAY);
    //Process Frame
    tld.processFrame(last_gray, current_gray, pts1, pts2, pbox, status, tl, bb_file);
    //Draw Points
    if (status){  //如果跟踪成功
      drawPoints(frame,pts1);
      drawPoints(frame,pts2,Scalar(0,255,0));  //当前的特征点用蓝色点表示
      drawBox(frame,pbox);
      detections++;
    }
    //Display
    imshow("TLD", frame);
    //swap points and images
    swap(last_gray, current_gray);  //STL函数swap()用来交换两对象的值。其泛型化版本定义于<algorithm>;
    pts1.clear();
    pts2.clear();
    frames++;
    printf("Detection rate: %d/%d\n", detections, frames);
    if (cvWaitKey(33) == 'q')
      break;
  }
  if (rep){
    rep = false;
    tl = false;
    fclose(bb_file);
    bb_file = fopen("final_detector.txt","w");
    //capture.set(CV_CAP_PROP_POS_AVI_RATIO,0);
    capture.release();
    capture.open(video);
    goto REPEAT;
  }
  fclose(bb_file);
  return 0;
}

tld_utils.cpp

#include <tld_utils.h>
using namespace cv;
using namespace std;

/*vector是C++标准模板库STL中的部分内容，它是一个多功能的，能够操作多种数据结构和算法的
模板类和函数库。vector之所以被认为是一个容器，是因为它能够像容器一样存放各种类型的对象，
简单地说，vector是一个能够存放任意类型的动态数组，能够增加和压缩数据。
为了可以使用vector，必须在你的头文件中包含下面的代码：
#include <vector>
vector属于std命名域的，因此需要通过命名限定，如下完成你的代码：
using std::vector;
*/

void drawBox(Mat& image, CvRect box, Scalar color, int thick){
  rectangle( image, cvPoint(box.x, box.y), cvPoint(box.x+box.width,box.y+box.height),color, thick);
} 

//函数 cvRound, cvFloor, cvCeil 用一种舍入方法将输入浮点数转换成整数。
//cvRound 返回和参数最接近的整数值。 cvFloor 返回不大于参数的最大整数值。
//cvCeil 返回不小于参数的最小整数值。
void drawPoints(Mat& image, vector<Point2f> points,Scalar color){
  for( vector<Point2f>::const_iterator i = points.begin(), ie = points.end(); i != ie; ++i )
      {
      Point center( cvRound(i->x ), cvRound(i->y));  //类似于int i(3)的初始化，但center为何没用到？
      circle(image,*i,2,color,1);
      }
}

Mat createMask(const Mat& image, CvRect box){
  Mat mask = Mat::zeros(image.rows,image.cols,CV_8U);
  drawBox(mask,box,Scalar::all(255),CV_FILLED);
  return mask;
}

//STL中的nth_element()方法找出一个数列中排名第n的那个数。
//对于序列a[0:len-1]将第n大的数字，排在a[n],同时a[0:n-1]都小于a[n],a[n+1:]都大于a[n],
//但a[n]左右的这两个序列不一定有序。
//用在中值流跟踪算法中，寻找中值
float median(vector<float> v)
{
    int n = floor(v.size() / 2);
    nth_element(v.begin(), v.begin()+n, v.end());
    return v[n];
}

//<algorithm> //random_shuffle的头文件
//shuffle 洗牌  首先简单的介绍一个扑克牌洗牌的方法，假设一个数组 poker[52] 中存有一副扑克
//牌1-52的牌点值，使用一个for循环遍历这个数组，每次循环都生成一个[0，52)之间的随机数RandNum，
//以RandNum为数组下标，把当前下标对应的值和RandNum对应位置的值交换，循环结束，每个牌都与某个
//位置交换了一次，这样一副牌就被打乱了。 理解代码如下：
/*
for (int i = 0; i < 52; ++i)  
{  
    int RandNum = rand() % 52;    
    int tmp = poker[i];  
    poker[i] = poker[RandNum];  
    poker[RandNum] = tmp;  
} 
*/
//需要指定范围内的随机数，传统的方法是使用ANSI C的函数random(),然后格式化结果以便结果是落在
//指定的范围内。但是，使用这个方法至少有两个缺点。做格式化时，结果常常是扭曲的,且只支持整型数。
//C++中提供了更好的解决方法，那就是STL中的random_shuffle()算法。产生指定范围内的随机元素集的最佳方法
//是创建一个顺序序列（也就是向量或者内置数组），在这个顺序序列中含有指定范围的所有值。
//例如，如果你需要产生100个0-99之间的数，那么就创建一个向量并用100个按升序排列的数填充向量.
//填充完向量之后，用random_shuffle()算法打乱元素排列顺序。
//默认的random_shuffle中, 被操作序列的index 与 rand() % N 两个位置的值交换，来达到乱序的目的。
//index_shuffle()用于产生指定范围[begin:end]的随机数，返回随机数数组
vector<int> index_shuffle(int begin,int end){
  vector<int> indexes(end-begin);
  for (int i=begin;i<end;i++){
    indexes[i]=i;
  }
  random_shuffle(indexes.begin(),indexes.end());
  return indexes;
}

LKTracker.h

#include<tld_utils.h>
#include <opencv2/opencv.hpp>

//使用金字塔LK光流法跟踪，所以类的成员变量很多都是OpenCV中calcOpticalFlowPyrLK()函数的参数
class LKTracker{
private:
  std::vector<cv::Point2f> pointsFB;
  cv::Size window_size;  //每个金字塔层的搜索窗口尺寸
  int level;            //最大的金字塔层数
  std::vector<uchar> status;   //数组。如果对应特征的光流被发现，数组中的每一个元素都被设置为 1， 否则设置为 0
  std::vector<uchar> FB_status;   
  std::vector<float> similarity;  //相似度
  std::vector<float> FB_error;   //Forward-Backward error方法，求FB_error的结果与原始位置的欧式距离
                                 //做比较，把距离过大的跟踪结果舍弃
  float simmed;
  float fbmed;
  //TermCriteria模板类，取代了之前的CvTermCriteria，这个类是作为迭代算法的终止条件的
  //该类变量需要3个参数，一个是类型，第二个参数为迭代的最大次数，最后一个是特定的阈值。
  //指定在每个金字塔层，为某点寻找光流的迭代过程的终止条件。
  cv::TermCriteria term_criteria;
  float lambda;   //某阈值？？Lagrangian 乘子
  // NCC 归一化交叉相关，FB error与NCC结合，使跟踪更稳定  交叉相关的图像匹配算法？？
  //交叉相关法的作用是进行云团移动的短时预测。选取连续两个时次的GMS-5卫星云图，将云图区域划分为32×32像素
  //的图像子集，采用交叉相关法计算获取两幅云图的最佳匹配区域，根据前后云图匹配区域的位置和时间间隔，确
  //定出每个图像子集的移动矢量（速度和方向），并对图像子集的移动矢量进行客观分析，其后，基于检验后的云
  //图移动矢量集，利用后向轨迹方法对云图作短时外推预测。
  void normCrossCorrelation(const cv::Mat& img1, const cv::Mat& img2, std::vector<cv::Point2f>& points1, std::vector<cv::Point2f>& points2);
  bool filterPts(std::vector<cv::Point2f>& points1,std::vector<cv::Point2f>& points2);
public:
  LKTracker();
  //特征点的跟踪？？
  bool trackf2f(const cv::Mat& img1, const cv::Mat& img2,
                std::vector<cv::Point2f> &points1, std::vector<cv::Point2f> &points2);
  float getFB(){return fbmed;}
};

LKTracker.cpp

#include <LKTracker.h>
using namespace cv;

//金字塔LK光流法跟踪
//Media Flow 中值光流跟踪 加 跟踪错误检测
//构造函数，初始化成员变量
LKTracker::LKTracker(){
  该类变量需要3个参数，一个是类型，第二个参数为迭代的最大次数，最后一个是特定的阈值。
  term_criteria = TermCriteria( TermCriteria::COUNT + TermCriteria::EPS, 20, 0.03);
  window_size = Size(4,4);
  level = 5;
  lambda = 0.5;
}


bool LKTracker::trackf2f(const Mat& img1, const Mat& img2, vector<Point2f> &points1, vector<cv::Point2f> &points2){
  //TODO!:implement c function cvCalcOpticalFlowPyrLK() or Faster tracking function
  //Forward-Backward tracking
  //基于Forward-Backward Error的中值流跟踪方法
  //金字塔LK光流法跟踪
  //forward trajectory 前向轨迹跟踪
  calcOpticalFlowPyrLK( img1,img2, points1, points2, status, similarity, window_size, level, term_criteria, lambda, 0);
  //backward trajectory 后向轨迹跟踪
  calcOpticalFlowPyrLK( img2,img1, points2, pointsFB, FB_status,FB_error, window_size, level, term_criteria, lambda, 0);
  
  //Compute the real FB-error
  //原理很简单：从t时刻的图像的A点，跟踪到t+1时刻的图像B点；然后倒回来，从t+1时刻的图像的B点往回跟踪，
  //假如跟踪到t时刻的图像的C点，这样就产生了前向和后向两个轨迹，比较t时刻中 A点 和 C点 的距离，如果距离
  //小于一个阈值，那么就认为前向跟踪是正确的；这个距离就是FB_error
  //计算 前向 与 后向 轨迹的误差
  for( int i= 0; i<points1.size(); ++i ){
        FB_error[i] = norm(pointsFB[i]-points1[i]);   //norm()求矩阵或向量的范数??绝对值？
  }
  //Filter out points with FB_error[i] <= median(FB_error) && points with sim_error[i] > median(sim_error)
  normCrossCorrelation(img1, img2, points1, points2);
  return filterPts(points1, points2);
}

//利用NCC把跟踪预测的结果周围取10*10的小图片与原始位置周围10*10的小图片（使用函数getRectSubPix得到）进
//行模板匹配（调用matchTemplate）
void LKTracker::normCrossCorrelation(const Mat& img1,const Mat& img2, vector<Point2f>& points1, vector<Point2f>& points2) {
        Mat rec0(10,10,CV_8U);
        Mat rec1(10,10,CV_8U);
        Mat res(1,1,CV_32F);

        for (int i = 0; i < points1.size(); i++) {
                if (status[i] == 1) {  //为1表示该特征点跟踪成功
						//从前一帧和当前帧图像中（以每个特征点为中心？）提取10x10象素矩形，使用亚象素精度
                        getRectSubPix( img1, Size(10,10), points1[i],rec0 );   
                        getRectSubPix( img2, Size(10,10), points2[i],rec1);
						//匹配前一帧和当前帧中提取的10x10象素矩形，得到匹配后的映射图像
						//CV_TM_CCOEFF_NORMED 归一化相关系数匹配法
						//参数分别为：欲搜索的图像。搜索模板。比较结果的映射图像。指定匹配方法
                        matchTemplate( rec0,rec1, res, CV_TM_CCOEFF_NORMED); 
                        similarity[i] = ((float *)(res.data))[0];  //得到各个特征点的相似度大小

                } else {
                        similarity[i] = 0.0;
                }
        }
        rec0.release();
        rec1.release();
        res.release();
}

//筛选出 FB_error[i] <= median(FB_error) 和 sim_error[i] > median(sim_error) 的特征点
//得到NCC和FB error结果的中值，分别去掉中值一半的跟踪结果不好的点
bool LKTracker::filterPts(vector<Point2f>& points1,vector<Point2f>& points2){
  //Get Error Medians
  simmed = median(similarity);   //找到相似度的中值
  size_t i, k;
  for( i=k = 0; i<points2.size(); ++i ){
        if( !status[i])
          continue;
        if(similarity[i]> simmed){   //剩下 similarity[i]> simmed 的特征点
          points1[k] = points1[i];   
          points2[k] = points2[i];
          FB_error[k] = FB_error[i];
          k++;
        }
    }
  if (k==0)
    return false;
  points1.resize(k);
  points2.resize(k);
  FB_error.resize(k);

  fbmed = median(FB_error);     //找到FB_error的中值
  for( i=k = 0; i<points2.size(); ++i ){
      if( !status[i])
        continue;
      if(FB_error[i] <= fbmed){   /
        points1[k] = points1[i];   //再对上一步剩下的特征点进一步筛选，剩下 FB_error[i] <= fbmed 的特征点
        points2[k] = points2[i];
        k++;
      }
  }
  points1.resize(k);
  points2.resize(k);
  if (k>0)
    return true;
  else
    return false;
}




/*
 * old OpenCV style
void LKTracker::init(Mat img0, vector<Point2f> &points){
  //Preallocate
  //pyr1 = cvCreateImage(Size(img1.width+8,img1.height/3),IPL_DEPTH_32F,1);
  //pyr2 = cvCreateImage(Size(img1.width+8,img1.height/3),IPL_DEPTH_32F,1);
  //const int NUM_PTS = points.size();
  //status = new char[NUM_PTS];
  //track_error = new float[NUM_PTS];
  //FB_error = new float[NUM_PTS];
}


void LKTracker::trackf2f(..){
  cvCalcOpticalFlowPyrLK( &img1, &img2, pyr1, pyr1, points1, points2, points1.size(), window_size, level, status, track_error, term_criteria, CV_LKFLOW_INITIAL_GUESSES);
  cvCalcOpticalFlowPyrLK( &img2, &img1, pyr2, pyr1, points2, pointsFB, points2.size(),window_size, level, 0, 0, term_criteria, CV_LKFLOW_INITIAL_GUESSES | CV_LKFLOW_PYR_A_READY | CV_LKFLOW_PYR_B_READY );
}
*/

TLD.h

#include <opencv2/opencv.hpp>
#include <tld_utils.h>
#include <LKTracker.h>
#include <FerNNClassifier.h>
#include <fstream>


//Bounding Boxes
struct BoundingBox : public cv::Rect {
  BoundingBox(){}
  BoundingBox(cv::Rect r): cv::Rect(r){}   //继承的话需要初始化基类
public:
  float overlap;        //Overlap with current Bounding Box
  int sidx;             //scale index
};

//Detection structure
struct DetStruct {
    std::vector<int> bb;
    std::vector<std::vector<int> > patt;
    std::vector<float> conf1;
    std::vector<float> conf2;
    std::vector<std::vector<int> > isin;
    std::vector<cv::Mat> patch;
  };
  
//Temporal structure
struct TempStruct {
    std::vector<std::vector<int> > patt;
    std::vector<float> conf;
  };

struct OComparator{  //比较两者重合度
  OComparator(const std::vector<BoundingBox>& _grid):grid(_grid){}
  std::vector<BoundingBox> grid;
  bool operator()(int idx1,int idx2){
    return grid[idx1].overlap > grid[idx2].overlap;
  }
};

struct CComparator{  //比较两者确信度？
  CComparator(const std::vector<float>& _conf):conf(_conf){}
  std::vector<float> conf;
  bool operator()(int idx1,int idx2){
    return conf[idx1]> conf[idx2];
  }
};


class TLD{
private:
  cv::PatchGenerator generator;  //PatchGenerator类用来对图像区域进行仿射变换
  FerNNClassifier classifier;
  LKTracker tracker;
  
  //下面这些参数通过程序开始运行时读入parameters.yml文件进行初始化
  ///Parameters
  int bbox_step;
  int min_win;
  int patch_size;
  
  //initial parameters for positive examples
  //从第一帧得到的目标的bounding box中（文件读取或者用户框定），经过几何变换得
  //到 num_closest_init * num_warps_init 个正样本
  int num_closest_init;  //最近邻窗口数 10
  int num_warps_init;  //几何变换数目 20
  int noise_init;
  float angle_init;
  float shift_init;
  float scale_init;
  
  从跟踪得到的目标的bounding box中，经过几何变换更新正样本（添加到在线模型？）
  //update parameters for positive examples
  int num_closest_update;
  int num_warps_update;
  int noise_update;
  float angle_update;
  float shift_update;
  float scale_update;
  
  //parameters for negative examples
  float bad_overlap;
  float bad_patches;
  
  ///Variables
//Integral Images  积分图像，用以计算2bitBP特征（类似于haar特征的计算）
//Mat最大的优势跟STL很相似，都是对内存进行动态的管理，不需要之前用户手动的管理内存
  cv::Mat iisum;
  cv::Mat iisqsum;
  float var;
  
//Training data
  //std::pair主要的作用是将两个数据组合成一个数据，两个数据可以是同一类型或者不同类型。
  //pair实质上是一个结构体，其主要的两个成员变量是first和second，这两个变量可以直接使用。
  //在这里用来表示样本，first成员为 features 特征点数组，second成员为 labels 样本类别标签
  std::vector<std::pair<std::vector<int>,int> > pX; //positive ferns <features,labels=1>  正样本
  std::vector<std::pair<std::vector<int>,int> > nX; // negative ferns <features,labels=0>  负样本
  cv::Mat pEx;  //positive NN example  
  std::vector<cv::Mat> nEx; //negative NN examples
  
//Test data 
  std::vector<std::pair<std::vector<int>,int> > nXT; //negative data to Test
  std::vector<cv::Mat> nExT; //negative NN examples to Test
  
//Last frame data
  BoundingBox lastbox;
  bool lastvalid;
  float lastconf;
  
//Current frame data
  //Tracker data
  bool tracked;
  BoundingBox tbb;
  bool tvalid;
  float tconf;
  
  //Detector data
  TempStruct tmp;
  DetStruct dt;
  std::vector<BoundingBox> dbb;
  std::vector<bool> dvalid;   //检测有效性？？
  std::vector<float> dconf;  //检测确信度？？
  bool detected;


  //Bounding Boxes
  std::vector<BoundingBox> grid;
  std::vector<cv::Size> scales;
  std::vector<int> good_boxes; //indexes of bboxes with overlap > 0.6
  std::vector<int> bad_boxes; //indexes of bboxes with overlap < 0.2
  BoundingBox bbhull; // hull of good_boxes  //good_boxes 的 壳，也就是窗口的边框
  BoundingBox best_box; // maximum overlapping bbox

public:
  //Constructors
  TLD();
  TLD(const cv::FileNode& file);
  void read(const cv::FileNode& file);
  
  //Methods
  void init(const cv::Mat& frame1,const cv::Rect &box, FILE* bb_file);
  void generatePositiveData(const cv::Mat& frame, int num_warps);
  void generateNegativeData(const cv::Mat& frame);
  void processFrame(const cv::Mat& img1,const cv::Mat& img2,std::vector<cv::Point2f>& points1,std::vector<cv::Point2f>& points2,
      BoundingBox& bbnext,bool& lastboxfound, bool tl,FILE* bb_file);
  void track(const cv::Mat& img1, const cv::Mat& img2,std::vector<cv::Point2f>& points1,std::vector<cv::Point2f>& points2);
  void detect(const cv::Mat& frame);
  void clusterConf(const std::vector<BoundingBox>& dbb,const std::vector<float>& dconf,std::vector<BoundingBox>& cbb,std::vector<float>& cconf);
  void evaluate();
  void learn(const cv::Mat& img);
  
  //Tools
  void buildGrid(const cv::Mat& img, const cv::Rect& box);
  float bbOverlap(const BoundingBox& box1,const BoundingBox& box2);
  void getOverlappingBoxes(const cv::Rect& box1,int num_closest);
  void getBBHull();
  void getPattern(const cv::Mat& img, cv::Mat& pattern,cv::Scalar& mean,cv::Scalar& stdev);
  void bbPoints(std::vector<cv::Point2f>& points, const BoundingBox& bb);
  void bbPredict(const std::vector<cv::Point2f>& points1,const std::vector<cv::Point2f>& points2,
      const BoundingBox& bb1,BoundingBox& bb2);
  double getVar(const BoundingBox& box,const cv::Mat& sum,const cv::Mat& sqsum);
  bool bbComp(const BoundingBox& bb1,const BoundingBox& bb2);
  int clusterBB(const std::vector<BoundingBox>& dbb,std::vector<int>& indexes);
};

TLD.cpp

/*
 * TLD.cpp
 *
 *  Created on: Jun 9, 2011
 *      Author: alantrrs
 */

#include <TLD.h>
#include <stdio.h>
using namespace cv;
using namespace std;


TLD::TLD()
{
}
TLD::TLD(const FileNode& file){
  read(file);
}

void TLD::read(const FileNode& file){
  ///Bounding Box Parameters
  min_win = (int)file["min_win"];
  ///Genarator Parameters
  //initial parameters for positive examples
  patch_size = (int)file["patch_size"];
  num_closest_init = (int)file["num_closest_init"];
  num_warps_init = (int)file["num_warps_init"];
  noise_init = (int)file["noise_init"];
  angle_init = (float)file["angle_init"];
  shift_init = (float)file["shift_init"];
  scale_init = (float)file["scale_init"];
  //update parameters for positive examples
  num_closest_update = (int)file["num_closest_update"];
  num_warps_update = (int)file["num_warps_update"];
  noise_update = (int)file["noise_update"];
  angle_update = (float)file["angle_update"];
  shift_update = (float)file["shift_update"];
  scale_update = (float)file["scale_update"];
  //parameters for negative examples
  bad_overlap = (float)file["overlap"];
  bad_patches = (int)file["num_patches"];
  classifier.read(file);
}

//此函数完成准备工作
void TLD::init(const Mat& frame1, const Rect& box, FILE* bb_file){
  //bb_file = fopen("bounding_boxes.txt","w");
  //Get Bounding Boxes
  //此函数根据传入的box（目标边界框）在传入的图像frame1中构建全部的扫描窗口，并计算重叠度
    buildGrid(frame1, box);
    printf("Created %d bounding boxes\n",(int)grid.size());  //vector的成员size()用于获取向量元素的个数
	
  ///Preparation
  //allocation
  //积分图像，用以计算2bitBP特征（类似于haar特征的计算）
  //Mat的创建，方式有两种：1.调用create（行，列，类型）2.Mat（行，列，类型（值））。
  iisum.create(frame1.rows+1, frame1.cols+1, CV_32F);
  iisqsum.create(frame1.rows+1, frame1.cols+1, CV_64F);
  
  //Detector data中定义：std::vector<float> dconf;  检测确信度？？
  //vector 的reserve增加了vector的capacity，但是它的size没有改变！而resize改变了vector
  //的capacity同时也增加了它的size！reserve是容器预留空间，但在空间内不真正创建元素对象，
  //所以在没有添加新的对象之前，不能引用容器内的元素。
  //不管是调用resize还是reserve，二者对容器原有的元素都没有影响。
  //myVec.reserve( 100 );     // 新元素还没有构造, 此时不能用[]访问元素
  //myVec.resize( 100 );      // 用元素的默认构造函数构造了100个新的元素，可以直接操作新元素
  dconf.reserve(100);
  dbb.reserve(100);
  bbox_step =7;
  
  //以下在Detector data中定义的容器都给其分配grid.size()大小（这个是一幅图像中全部的扫描窗口个数）的容量
  //Detector data中定义TempStruct tmp;  
  //tmp.conf.reserve(grid.size());
  tmp.conf = vector<float>(grid.size());
  tmp.patt = vector<vector<int> >(grid.size(), vector<int>(10,0));
  //tmp.patt.reserve(grid.size());
  dt.bb.reserve(grid.size());
  good_boxes.reserve(grid.size());
  bad_boxes.reserve(grid.size());
  
  //TLD中定义：cv::Mat pEx;  //positive NN example 大小为15*15图像片
  pEx.create(patch_size, patch_size, CV_64F);
  
  //Init Generator
  //TLD中定义：cv::PatchGenerator generator;  //PatchGenerator类用来对图像区域进行仿射变换
  /*
  cv::PatchGenerator::PatchGenerator (    
      double     _backgroundMin,
      double     _backgroundMax,
      double     _noiseRange,
      bool     _randomBlur = true,
      double     _lambdaMin = 0.6,
      double     _lambdaMax = 1.5,
      double     _thetaMin = -CV_PI,
      double     _thetaMax = CV_PI,
      double     _phiMin = -CV_PI,
      double     _phiMax = CV_PI 
   ) 
   一般的用法是先初始化一个PatchGenerator的实例，然后RNG一个随机因子，再调用（）运算符产生一个变换后的正样本。
  */
  generator = PatchGenerator (0,0,noise_init,true,1-scale_init,1+scale_init,-angle_init*CV_PI/180,
								angle_init*CV_PI/180,-angle_init*CV_PI/180,angle_init*CV_PI/180);
  
  //此函数根据传入的box（目标边界框），在整帧图像中的全部窗口中寻找与该box距离最小（即最相似，
  //重叠度最大）的num_closest_init个窗口，然后把这些窗口 归入good_boxes容器
  //同时，把重叠度小于0.2的，归入 bad_boxes 容器
  //首先根据overlap的比例信息选出重复区域比例大于60%并且前num_closet_init= 10个的最接近box的RectBox，
  //相当于对RectBox进行筛选。并通过BBhull函数得到这些RectBox的最大边界。
  getOverlappingBoxes(box, num_closest_init);
  printf("Found %d good boxes, %d bad boxes\n",(int)good_boxes.size(),(int)bad_boxes.size());
  printf("Best Box: %d %d %d %d\n",best_box.x, best_box.y, best_box.width, best_box.height);
  printf("Bounding box hull: %d %d %d %d\n", bbhull.x, bbhull.y, bbhull.width, bbhull.height);
  
  //Correct Bounding Box
  lastbox=best_box;
  lastconf=1;
  lastvalid=true;
  //Print
  fprintf(bb_file,"%d,%d,%d,%d,%f\n",lastbox.x,lastbox.y,lastbox.br().x,lastbox.br().y,lastconf);
  
  //Prepare Classifier 准备分类器
  //scales容器里是所有扫描窗口的尺度，由buildGrid()函数初始化
  classifier.prepare(scales);
  
  ///Generate Data
  // Generate positive data
  generatePositiveData(frame1, num_warps_init);
  
  // Set variance threshold
  Scalar stdev, mean;
  //统计best_box的均值和标准差
  例如需要提取图像A的某个ROI（感兴趣区域，由矩形框）的话，用Mat类的B=img(ROI)即可提取
  //frame1(best_box)就表示在frame1中提取best_box区域（目标区域）的图像片
  meanStdDev(frame1(best_box), mean, stdev);
  
  //利用积分图像去计算每个待检测窗口的方差
  //cvIntegral( const CvArr* image, CvArr* sum, CvArr* sqsum=NULL, CvArr* tilted_sum=NULL );
  //计算积分图像，输入图像，sum积分图像, W+1×H+1，sqsum对象素值平方的积分图像，tilted_sum旋转45度的积分图像
  //利用积分图像，可以计算在某象素的上－右方的或者旋转的矩形区域中进行求和、求均值以及标准方差的计算，
  //并且保证运算的复杂度为O(1)。  
  integral(frame1, iisum, iisqsum);
  //级联分类器模块一：方差检测模块，利用积分图计算每个待检测窗口的方差，方差大于var阈值（目标patch方差的50%）的，
  //则认为其含有前景目标方差；var 为标准差的平方
  var = pow(stdev.val[0],2) * 0.5; //getVar(best_box,iisum,iisqsum);
  cout << "variance: " << var << endl;
  
  //check variance
  //getVar函数通过积分图像计算输入的best_box的方差
  double vr =  getVar(best_box, iisum, iisqsum)*0.5;
  cout << "check variance: " << vr << endl;
  
  // Generate negative data
  generateNegativeData(frame1);
  
  //Split Negative Ferns into Training and Testing sets (they are already shuffled)
  //将负样本放进 训练和测试集
  int half = (int)nX.size()*0.5f;
  //vector::assign函数将区间[start, end)中的值赋值给当前的vector.
  //将一半的负样本集 作为 测试集
  nXT.assign(nX.begin()+half, nX.end());  //nXT; //negative data to Test
  //然后将剩下的一半作为训练集
  nX.resize(half);
  
  ///Split Negative NN Examples into Training and Testing sets
  half = (int)nEx.size()*0.5f;
  nExT.assign(nEx.begin()+half,nEx.end());
  nEx.resize(half);
  
  //Merge Negative Data with Positive Data and shuffle it
  //将负样本和正样本合并，然后打乱
  vector<pair<vector<int>,int> > ferns_data(nX.size()+pX.size());
  vector<int> idx = index_shuffle(0, ferns_data.size());
  int a=0;
  for (int i=0;i<pX.size();i++){
      ferns_data[idx[a]] = pX[i];
      a++;
  }
  for (int i=0;i<nX.size();i++){
      ferns_data[idx[a]] = nX[i];
      a++;
  }
  
  //Data already have been shuffled, just putting it in the same vector
  vector<cv::Mat> nn_data(nEx.size()+1);
  nn_data[0] = pEx;
  for (int i=0;i<nEx.size();i++){
      nn_data[i+1]= nEx[i];
  }
  
  ///Training  
  //训练 集合分类器（森林） 和 最近邻分类器 
  classifier.trainF(ferns_data, 2); //bootstrap = 2
  classifier.trainNN(nn_data);
  
  ///Threshold Evaluation on testing sets
  //用样本在上面得到的 集合分类器（森林） 和 最近邻分类器 中分类，评价得到最好的阈值
  classifier.evaluateTh(nXT, nExT);
}

/* Generate Positive data
 * Inputs:
 * - good_boxes (bbP)
 * - best_box (bbP0)
 * - frame (im0)
 * Outputs:
 * - Positive fern features (pX)
 * - Positive NN examples (pEx)
 */
void TLD::generatePositiveData(const Mat& frame, int num_warps){
	/*
	CvScalar定义可存放1—4个数值的数值，常用来存储像素，其结构体如下：
	typedef struct CvScalar
	{
		double val[4];
	}CvScalar;
	如果使用的图像是1通道的，则s.val[0]中存储数据
	如果使用的图像是3通道的，则s.val[0]，s.val[1]，s.val[2]中存储数据
	*/
  Scalar mean;   //均值
  Scalar stdev;   //标准差
  
  //此函数将frame图像best_box区域的图像片归一化为均值为0的15*15大小的patch，存在pEx正样本中
  getPattern(frame(best_box), pEx, mean, stdev);
  
  //Get Fern features on warped patches
  Mat img;
  Mat warped;
  //void GaussianBlur(InputArray src, OutputArray dst, Size ksize, double sigmaX, double sigmaY=0, 
  //									int borderType=BORDER_DEFAULT ) ;
  //功能：对输入的图像src进行高斯滤波后用dst输出。
  //src和dst当然分别是输入图像和输出图像。Ksize为高斯滤波器模板大小，sigmaX和sigmaY分别为高斯滤
  //波在横向和竖向的滤波系数。borderType为边缘扩展点插值类型。
  //用9*9高斯核模糊输入帧，存入img  去噪？？
  GaussianBlur(frame, img, Size(9,9), 1.5);
  
  //在img图像中截取bbhull信息（bbhull是包含了位置和大小的矩形框）的图像赋给warped
  //例如需要提取图像A的某个ROI（感兴趣区域，由矩形框）的话，用Mat类的B=img(ROI)即可提取
  warped = img(bbhull);
  RNG& rng = theRNG();  //生成一个随机数
  Point2f pt(bbhull.x + (bbhull.width-1)*0.5f, bbhull.y+(bbhull.height-1)*0.5f);  //取矩形框中心的坐标  int i(2)
  
  //nstructs树木（由一个特征组构建，每组特征代表图像块的不同视图表示）的个数
  //fern[nstructs] nstructs棵树的森林的数组？？
  vector<int> fern(classifier.getNumStructs());
  pX.clear();
  Mat patch;

  //pX为处理后的RectBox最大边界处理后的像素信息，pEx最近邻的RectBox的Pattern，bbP0为最近邻的RectBox。
  if (pX.capacity() < num_warps * good_boxes.size())
    pX.reserve(num_warps * good_boxes.size());  //pX正样本个数为 仿射变换个数 * good_box的个数，故需分配至少这么大的空间
  int idx;
  for (int i=0; i< num_warps; i++){
     if (i>0)
	 //PatchGenerator类用来对图像区域进行仿射变换，先RNG一个随机因子，再调用（）运算符产生一个变换后的正样本。
       generator(frame, pt, warped, bbhull.size(), rng);
       for (int b=0; b < good_boxes.size(); b++){
         idx = good_boxes[b];  //good_boxes容器保存的是 grid 的索引
		 patch = img(grid[idx]);  //把img的 grid[idx] 区域（也就是bounding box重叠度高的）这一块图像片提取出来
		 //getFeatures函数得到输入的patch的用于树的节点，也就是特征组的特征fern（13位的二进制代码）
         classifier.getFeatures(patch, grid[idx].sidx, fern);  //grid[idx].sidx 对应的尺度索引
         pX.push_back(make_pair(fern, 1));   //positive ferns <features, labels=1>  正样本
     }
  }
  printf("Positive examples generated: ferns:%d NN:1\n",(int)pX.size());
}

//先对最接近box的RectBox区域得到其patch ,然后将像素信息转换为Pattern，
//具体的说就是归一化RectBox对应的patch的size（放缩至patch_size = 15*15），将2维的矩阵变成一维的向量信息，
//然后将向量信息均值设为0，调整为zero mean and unit variance（ZMUV）
//Output: resized Zero-Mean patch
void TLD::getPattern(const Mat& img, Mat& pattern, Scalar& mean, Scalar& stdev){
  //将img放缩至patch_size = 15*15，存到pattern中
  resize(img, pattern, Size(patch_size, patch_size));
  
  //计算pattern这个矩阵的均值和标准差
  //Computes a mean value and a standard deviation of matrix elements.
  meanStdDev(pattern, mean, stdev);
  pattern.convertTo(pattern, CV_32F);
  
  //opencv中Mat的运算符有重载， Mat可以 + Mat; + Scalar; + int / float / double 都可以
  //将矩阵所有元素减去其均值，也就是把patch的均值设为零
  pattern = pattern - mean.val[0];
}

/* Inputs:
 * - Image
 * - bad_boxes (Boxes far from the bounding box)
 * - variance (pEx variance)
 * Outputs
 * - Negative fern features (nX)
 * - Negative NN examples (nEx)
 */
void TLD::generateNegativeData(const Mat& frame){
  //由于之前重叠度小于0.2的，都归入 bad_boxes了，所以数量挺多，下面的函数用于打乱顺序，也就是为了
  //后面随机选择bad_boxes
  random_shuffle(bad_boxes.begin(), bad_boxes.end());//Random shuffle bad_boxes indexes
  int idx;
  //Get Fern Features of the boxes with big variance (calculated using integral images)
  int a=0;
  //int num = std::min((int)bad_boxes.size(),(int)bad_patches*100); //limits the size of bad_boxes to try
  printf("negative data generation started.\n");
  vector<int> fern(classifier.getNumStructs());
  nX.reserve(bad_boxes.size());
  Mat patch;
  for (int j=0;j<bad_boxes.size();j++){  //把方差较大的bad_boxes加入负样本
      idx = bad_boxes[j];
          if (getVar(grid[idx],iisum,iisqsum)<var*0.5f)
            continue;
      patch =  frame(grid[idx]);
	  classifier.getFeatures(patch, grid[idx].sidx, fern);
      nX.push_back(make_pair(fern, 0)); //得到负样本
      a++;
  }
  printf("Negative examples generated: ferns: %d ", a);
  
  //random_shuffle(bad_boxes.begin(),bad_boxes.begin()+bad_patches);//Randomly selects 'bad_patches' and get the patterns for NN;
  Scalar dum1, dum2;
  //bad_patches = (int)file["num_patches"]; 在参数文件中 num_patches = 100
  nEx=vector<Mat>(bad_patches);
  for (int i=0;i<bad_patches;i++){
      idx=bad_boxes[i];
	  patch = frame(grid[idx]);
	  //具体的说就是归一化RectBox对应的patch的size（放缩至patch_size = 15*15）
	  //由于负样本不需要均值和方差，所以就定义dum，将其舍弃
      getPattern(patch,nEx[i],dum1,dum2);
  }
  printf("NN: %d\n",(int)nEx.size());
}

//该函数通过积分图像计算输入的box的方差
double TLD::getVar(const BoundingBox& box, const Mat& sum, const Mat& sqsum){
  double brs = sum.at<int>(box.y+box.height, box.x+box.width);
  double bls = sum.at<int>(box.y+box.height, box.x);
  double trs = sum.at<int>(box.y,box.x + box.width);
  double tls = sum.at<int>(box.y,box.x);
  double brsq = sqsum.at<double>(box.y+box.height,box.x+box.width);
  double blsq = sqsum.at<double>(box.y+box.height,box.x);
  double trsq = sqsum.at<double>(box.y,box.x+box.width);
  double tlsq = sqsum.at<double>(box.y,box.x);
  
  double mean = (brs+tls-trs-bls)/((double)box.area());
  double sqmean = (brsq+tlsq-trsq-blsq)/((double)box.area());
  //方差=E(X^2)-(EX)^2   EX表示均值
  return sqmean-mean*mean;
}

void TLD::processFrame(const cv::Mat& img1,const cv::Mat& img2,vector<Point2f>& points1,vector<Point2f>& points2,BoundingBox& bbnext, bool& lastboxfound, bool tl, FILE* bb_file){
  vector<BoundingBox> cbb;
  vector<float> cconf;
  int confident_detections=0;
  int didx; //detection index
  
  ///Track  跟踪模块
  if(lastboxfound && tl){   //tl: train and learn
	  //跟踪
      track(img1, img2, points1, points2);
  }
  else{
      tracked = false;
  }
  
  ///Detect   检测模块
  detect(img2);
  
  ///Integration   综合模块
  //TLD只跟踪单目标，所以综合模块综合跟踪器跟踪到的单个目标和检测器检测到的多个目标，然后只输出保守相似度最大的一个目标
  if (tracked){
      bbnext=tbb;
      lastconf=tconf;   //表示相关相似度的阈值
      lastvalid=tvalid;  //表示保守相似度的阈值
      printf("Tracked\n");
      if(detected){                                               //   if Detected
		  //通过 重叠度 对检测器检测到的目标bounding box进行聚类，每个类其重叠度小于0.5
          clusterConf(dbb, dconf, cbb, cconf);                       //   cluster detections
          printf("Found %d clusters\n",(int)cbb.size());
          for (int i=0;i<cbb.size();i++){
			  //找到与跟踪器跟踪到的box距离比较远的类（检测器检测到的box），而且它的相关相似度比跟踪器的要大
              if (bbOverlap(tbb, cbb[i])<0.5 && cconf[i]>tconf){  //  Get index of a clusters that is far from tracker and are more confident than the tracker
                  confident_detections++;  //记录满足上述条件，也就是可信度比较高的目标box的个数
                  didx=i; //detection index
              }
          }
		  //如果只有一个满足上述条件的box，那么就用这个目标box来重新初始化跟踪器（也就是用检测器的结果去纠正跟踪器）
          if (confident_detections==1){                                //if there is ONE such a cluster, re-initialize the tracker
              printf("Found a better match..reinitializing tracking\n");
              bbnext=cbb[didx];
              lastconf=cconf[didx];
              lastvalid=false;
          }
          else {
              printf("%d confident cluster was found\n", confident_detections);
              int cx=0,cy=0,cw=0,ch=0;
              int close_detections=0;
              for (int i=0;i<dbb.size();i++){
				  //找到检测器检测到的box与跟踪器预测到的box距离很近（重叠度大于0.7）的box，对其坐标和大小进行累加
                  if(bbOverlap(tbb,dbb[i])>0.7){                     // Get mean of close detections
                      cx += dbb[i].x;
                      cy +=dbb[i].y;
                      cw += dbb[i].width;
                      ch += dbb[i].height;
                      close_detections++;   //记录最近邻box的个数
                      printf("weighted detection: %d %d %d %d\n",dbb[i].x,dbb[i].y,dbb[i].width,dbb[i].height);
                  }
              }
              if (close_detections>0){
				  //对与跟踪器预测到的box距离很近的box 和 跟踪器本身预测到的box 进行坐标与大小的平均作为最终的
				  //目标bounding box，但是跟踪器的权值较大
                  bbnext.x = cvRound((float)(10*tbb.x+cx)/(float)(10+close_detections));   // weighted average trackers trajectory with the close detections
                  bbnext.y = cvRound((float)(10*tbb.y+cy)/(float)(10+close_detections));
                  bbnext.width = cvRound((float)(10*tbb.width+cw)/(float)(10+close_detections));
                  bbnext.height =  cvRound((float)(10*tbb.height+ch)/(float)(10+close_detections));
                  printf("Tracker bb: %d %d %d %d\n",tbb.x,tbb.y,tbb.width,tbb.height);
                  printf("Average bb: %d %d %d %d\n",bbnext.x,bbnext.y,bbnext.width,bbnext.height);
                  printf("Weighting %d close detection(s) with tracker..\n",close_detections);
              }
              else{
                printf("%d close detections were found\n",close_detections);

              }
          }
      }
  }
  else{                                       //   If NOT tracking
      printf("Not tracking..\n");
      lastboxfound = false;
      lastvalid = false;
	  //如果跟踪器没有跟踪到目标，但是检测器检测到了一些可能的目标box，那么同样对其进行聚类，但只是简单的
	  //将聚类的cbb[0]作为新的跟踪目标box（不比较相似度了？？还是里面已经排好序了？？），重新初始化跟踪器
      if(detected){                           //  and detector is defined
          clusterConf(dbb,dconf,cbb,cconf);   //  cluster detections
          printf("Found %d clusters\n",(int)cbb.size());
          if (cconf.size()==1){
              bbnext=cbb[0];
              lastconf=cconf[0];
              printf("Confident detection..reinitializing tracker\n");
              lastboxfound = true;
          }
      }
  }
  lastbox=bbnext;
  if (lastboxfound)
    fprintf(bb_file,"%d,%d,%d,%d,%f\n",lastbox.x,lastbox.y,lastbox.br().x,lastbox.br().y,lastconf);
  else
    fprintf(bb_file,"NaN,NaN,NaN,NaN,NaN\n");
	
  ///learn 学习模块
  if (lastvalid && tl)
    learn(img2);
}

/*Inputs:
* -current frame(img2), last frame(img1), last Bbox(bbox_f[0]).
*Outputs:
*- Confidence(tconf), Predicted bounding box(tbb), Validity(tvalid), points2 (for display purposes only)
*/
void TLD::track(const Mat& img1, const Mat& img2, vector<Point2f>& points1, vector<Point2f>& points2){
  
  //Generate points
  //网格均匀撒点（均匀采样），在lastbox中共产生最多10*10=100个特征点，存于points1
  bbPoints(points1, lastbox);
  if (points1.size()<1){
      printf("BB= %d %d %d %d, Points not generated\n",lastbox.x,lastbox.y,lastbox.width,lastbox.height);
      tvalid=false;
      tracked=false;
      return;
  }
  vector<Point2f> points = points1;
  
  //Frame-to-frame tracking with forward-backward error cheking
  //trackf2f函数完成：跟踪、计算FB error和匹配相似度sim，然后筛选出 FB_error[i] <= median(FB_error) 和 
  //sim_error[i] > median(sim_error) 的特征点（跟踪结果不好的特征点），剩下的是不到50%的特征点
  tracked = tracker.trackf2f(img1, img2, points, points2);
  if (tracked){
      //Bounding box prediction
	  //利用剩下的这不到一半的跟踪点输入来预测bounding box在当前帧的位置和大小 tbb
      bbPredict(points, points2, lastbox, tbb);
	  //跟踪失败检测：如果FB error的中值大于10个像素（经验值），或者预测到的当前box的位置移出图像，则
	  //认为跟踪错误，此时不返回bounding box；Rect::br()返回的是右下角的坐标
	  //getFB()返回的是FB error的中值
      if (tracker.getFB()>10 || tbb.x>img2.cols ||  tbb.y>img2.rows || tbb.br().x < 1 || tbb.br().y <1){
          tvalid =false; //too unstable prediction or bounding box out of image
          tracked = false;
          printf("Too unstable predictions FB error=%f\n", tracker.getFB());
          return;
      }
	  
      //Estimate Confidence and Validity
	  //评估跟踪确信度和有效性
      Mat pattern;
      Scalar mean, stdev;
      BoundingBox bb;
      bb.x = max(tbb.x,0);
      bb.y = max(tbb.y,0);
      bb.width = min(min(img2.cols-tbb.x,tbb.width), min(tbb.width, tbb.br().x));
      bb.height = min(min(img2.rows-tbb.y,tbb.height),min(tbb.height,tbb.br().y));
	  //归一化img2(bb)对应的patch的size（放缩至patch_size = 15*15），存入pattern
      getPattern(img2(bb),pattern,mean,stdev);
      vector<int> isin;
      float dummy;
	  //计算图像片pattern到在线模型M的保守相似度
      classifier.NNConf(pattern,isin,dummy,tconf); //Conservative Similarity
      tvalid = lastvalid;
	  //保守相似度大于阈值，则评估跟踪有效
      if (tconf>classifier.thr_nn_valid){
          tvalid =true;
      }
  }
  else
    printf("No points tracked\n");

}

//网格均匀撒点，box共10*10=100个特征点
void TLD::bbPoints(vector<cv::Point2f>& points, const BoundingBox& bb){
  int max_pts=10;
  int margin_h=0; //采样边界
  int margin_v=0;
  //网格均匀撒点
  int stepx = ceil((bb.width-2*margin_h)/max_pts);  //ceil返回大于或者等于指定表达式的最小整数
  int stepy = ceil((bb.height-2*margin_v)/max_pts);
  //网格均匀撒点，box共10*10=100个特征点
  for (int y=bb.y+margin_v; y<bb.y+bb.height-margin_v; y+=stepy){
      for (int x=bb.x+margin_h;x<bb.x+bb.width-margin_h;x+=stepx){
          points.push_back(Point2f(x,y));
      }
  }
}

//利用剩下的这不到一半的跟踪点输入来预测bounding box在当前帧的位置和大小
void TLD::bbPredict(const vector<cv::Point2f>& points1,const vector<cv::Point2f>& points2,
                    const BoundingBox& bb1,BoundingBox& bb2)    {
  int npoints = (int)points1.size();
  vector<float> xoff(npoints);  //位移
  vector<float> yoff(npoints);
  printf("tracked points : %d\n", npoints);
  for (int i=0;i<npoints;i++){   //计算每个特征点在两帧之间的位移
      xoff[i]=points2[i].x - points1[i].x;
      yoff[i]=points2[i].y - points1[i].y;
  }
  float dx = median(xoff);   //计算位移的中值
  float dy = median(yoff);
  float s;
  //计算bounding box尺度scale的变化：通过计算 当前特征点相互间的距离 与 先前（上一帧）特征点相互间的距离 的
  //比值，以比值的中值作为尺度的变化因子
  if (npoints>1){
      vector<float> d;
      d.reserve(npoints*(npoints-1)/2);  //等差数列求和：1+2+...+(npoints-1)
      for (int i=0;i<npoints;i++){
          for (int j=i+1;j<npoints;j++){
		  //计算 当前特征点相互间的距离 与 先前（上一帧）特征点相互间的距离 的比值（位移用绝对值）
              d.push_back(norm(points2[i]-points2[j])/norm(points1[i]-points1[j]));
          }
      }
      s = median(d);
  }
  else {
      s = 1.0;
  }

  float s1 = 0.5*(s-1)*bb1.width;
  float s2 = 0.5*(s-1)*bb1.height;
  printf("s= %f s1= %f s2= %f \n", s, s1, s2);
  
  //得到当前bounding box的位置与大小信息
  //当前box的x坐标 = 前一帧box的x坐标 + 全部特征点位移的中值（可理解为box移动近似的位移） - 当前box宽的一半
  bb2.x = round( bb1.x + dx - s1);
  bb2.y = round( bb1.y + dy -s2);
  bb2.width = round(bb1.width*s);
  bb2.height = round(bb1.height*s);
  printf("predicted bb: %d %d %d %d\n",bb2.x,bb2.y,bb2.br().x,bb2.br().y);
}

void TLD::detect(const cv::Mat& frame){
  //cleaning
  dbb.clear();
  dconf.clear();
  dt.bb.clear();
  //GetTickCount返回从操作系统启动到现在所经过的时间
  double t = (double)getTickCount();
  Mat img(frame.rows, frame.cols, CV_8U);
  integral(frame,iisum,iisqsum);   //计算frame的积分图 
  GaussianBlur(frame,img,Size(9,9),1.5);  //高斯模糊，去噪？
  int numtrees = classifier.getNumStructs();
  float fern_th = classifier.getFernTh(); //getFernTh()返回thr_fern; 集合分类器的分类阈值
  vector <int> ferns(10);
  float conf;
  int a=0;
  Mat patch;
  //级联分类器模块一：方差检测模块，利用积分图计算每个待检测窗口的方差，方差大于var阈值（目标patch方差的50%）的，
  //则认为其含有前景目标
  for (int i=0; i<grid.size(); i++){  //FIXME: BottleNeck 瓶颈
      if (getVar(grid[i],iisum,iisqsum) >= var){  //计算每一个扫描窗口的方差
          a++;
		  //级联分类器模块二：集合分类器检测模块
		  patch = img(grid[i]);
          classifier.getFeatures(patch,grid[i].sidx,ferns); //得到该patch特征（13位的二进制代码）
          conf = classifier.measure_forest(ferns);  //计算该特征值对应的后验概率累加值
          tmp.conf[i]=conf;   //Detector data中定义TempStruct tmp; 
          tmp.patt[i]=ferns;
		  //如果集合分类器的后验概率的平均值大于阈值fern_th（由训练得到），就认为含有前景目标
          if (conf > numtrees*fern_th){  
              dt.bb.push_back(i);  //将通过以上两个检测模块的扫描窗口记录在detect structure中
          }
      }
      else
        tmp.conf[i]=0.0;
  }
  int detections = dt.bb.size();
  printf("%d Bounding boxes passed the variance filter\n",a);
  printf("%d Initial detection from Fern Classifier\n", detections);
  
  //如果通过以上两个检测模块的扫描窗口数大于100个，则只取后验概率大的前100个
  if (detections>100){   //CComparator(tmp.conf)指定比较方式？？？
      nth_element(dt.bb.begin(), dt.bb.begin()+100, dt.bb.end(), CComparator(tmp.conf));
      dt.bb.resize(100);
      detections=100;
  }
//  for (int i=0;i<detections;i++){
//        drawBox(img,grid[dt.bb[i]]);
//    }
//  imshow("detections",img);
  if (detections==0){
        detected=false;
        return;
      }
  printf("Fern detector made %d detections ",detections);
  
  //两次使用getTickCount()，然后再除以getTickFrequency()，计算出来的是以秒s为单位的时间（opencv 2.0 以前是ms）
  t=(double)getTickCount()-t;  
  printf("in %gms\n", t*1000/getTickFrequency());  //打印以上代码运行使用的毫秒数
  
  //  Initialize detection structure
  dt.patt = vector<vector<int> >(detections,vector<int>(10,0));        //  Corresponding codes of the Ensemble Classifier
  dt.conf1 = vector<float>(detections);                                //  Relative Similarity (for final nearest neighbour classifier)
  dt.conf2 =vector<float>(detections);                                 //  Conservative Similarity (for integration with tracker)
  dt.isin = vector<vector<int> >(detections,vector<int>(3,-1));        //  Detected (isin=1) or rejected (isin=0) by nearest neighbour classifier
  dt.patch = vector<Mat>(detections,Mat(patch_size,patch_size,CV_32F));//  Corresponding patches
  int idx;
  Scalar mean, stdev;
  float nn_th = classifier.getNNTh();
  //级联分类器模块三：最近邻分类器检测模块
  for (int i=0;i<detections;i++){                                         //  for every remaining detection
      idx=dt.bb[i];                                                       //  Get the detected bounding box index
	  patch = frame(grid[idx]);
      getPattern(patch,dt.patch[i],mean,stdev);                //  Get pattern within bounding box
	  //计算图像片pattern到在线模型M的相关相似度和保守相似度
      classifier.NNConf(dt.patch[i],dt.isin[i],dt.conf1[i],dt.conf2[i]);  //  Evaluate nearest neighbour classifier
      dt.patt[i]=tmp.patt[idx];
      //printf("Testing feature %d, conf:%f isin:(%d|%d|%d)\n",i,dt.conf1[i],dt.isin[i][0],dt.isin[i][1],dt.isin[i][2]);
      //相关相似度大于阈值，则认为含有前景目标
	  if (dt.conf1[i]>nn_th){                                               //  idx = dt.conf1 > tld.model.thr_nn; % get all indexes that made it through the nearest neighbour
          dbb.push_back(grid[idx]);                                         //  BB    = dt.bb(:,idx); % bounding boxes
          dconf.push_back(dt.conf2[i]);                                     //  Conf  = dt.conf2(:,idx); % conservative confidences
      }
  }
  //打印检测到的可能存在目标的扫描窗口数（可以通过三个级联检测器的）
  if (dbb.size()>0){
      printf("Found %d NN matches\n",(int)dbb.size());
      detected=true;
  }
  else{
      printf("No NN matches found.\n");
      detected=false;
  }
}

//作者已经用python脚本../datasets/evaluate_vis.py来完成算法评估功能，具体见README
void TLD::evaluate(){
}

void TLD::learn(const Mat& img){
  printf("[Learning] ");
  
  ///Check consistency
  //检测一致性
  BoundingBox bb;
  bb.x = max(lastbox.x,0);
  bb.y = max(lastbox.y,0);
  bb.width = min(min(img.cols-lastbox.x,lastbox.width),min(lastbox.width,lastbox.br().x));
  bb.height = min(min(img.rows-lastbox.y,lastbox.height),min(lastbox.height,lastbox.br().y));
  Scalar mean, stdev;
  Mat pattern;
  //归一化img(bb)对应的patch的size（放缩至patch_size = 15*15），存入pattern
  getPattern(img(bb), pattern, mean, stdev);
  vector<int> isin;
  float dummy, conf;
  //计算输入图像片（跟踪器的目标box）与在线模型之间的相关相似度conf
  classifier.NNConf(pattern,isin,conf,dummy);
  if (conf<0.5) {   //如果相似度太小了，就不训练
      printf("Fast change..not training\n");
      lastvalid =false;
      return;
  }
  if (pow(stdev.val[0], 2)< var){  //如果方差太小了，也不训练
      printf("Low variance..not training\n");
      lastvalid=false;
      return;
  }
  if(isin[2]==1){   //如果被被识别为负样本，也不训练
      printf("Patch in negative data..not traing");
      lastvalid=false;
      return;
  }
  
  /// Data generation  样本产生
  for (int i=0;i<grid.size();i++){   //计算所有的扫描窗口与目标box的重叠度
      grid[i].overlap = bbOverlap(lastbox, grid[i]);
  }
  //集合分类器
  vector<pair<vector<int>,int> > fern_examples;
  good_boxes.clear();  
  bad_boxes.clear();
  //此函数根据传入的lastbox，在整帧图像中的全部窗口中寻找与该lastbox距离最小（即最相似，
  //重叠度最大）的num_closest_update个窗口，然后把这些窗口 归入good_boxes容器（只是把网格数组的索引存入）
  //同时，把重叠度小于0.2的，归入 bad_boxes 容器
  getOverlappingBoxes(lastbox, num_closest_update);
  if (good_boxes.size()>0)
    generatePositiveData(img, num_warps_update);  //用仿射模型产生正样本（类似于第一帧的方法，但只产生10*10=100个）
  else{
    lastvalid = false;
    printf("No good boxes..Not training");
    return;
  }
  fern_examples.reserve(pX.size() + bad_boxes.size());
  fern_examples.assign(pX.begin(), pX.end());
  int idx;
  for (int i=0;i<bad_boxes.size();i++){
      idx=bad_boxes[i];
      if (tmp.conf[idx]>=1){   //加入负样本，相似度大于1？？相似度不是出于0和1之间吗？
          fern_examples.push_back(make_pair(tmp.patt[idx],0));
      }
  }
  //最近邻分类器
  vector<Mat> nn_examples;
  nn_examples.reserve(dt.bb.size()+1);
  nn_examples.push_back(pEx);
  for (int i=0;i<dt.bb.size();i++){
      idx = dt.bb[i];
      if (bbOverlap(lastbox,grid[idx]) < bad_overlap)
        nn_examples.push_back(dt.patch[i]);
  }
  
  /// Classifiers update  分类器训练
  classifier.trainF(fern_examples,2);
  classifier.trainNN(nn_examples);
  classifier.show(); //把正样本库（在线模型）包含的所有正样本显示在窗口上
}

//检测器采用扫描窗口的策略
//此函数根据传入的box（目标边界框）在传入的图像中构建全部的扫描窗口，并计算每个窗口与box的重叠度
void TLD::buildGrid(const cv::Mat& img, const cv::Rect& box){
  const float SHIFT = 0.1;  //扫描窗口步长为 宽高的 10%
  //尺度缩放系数为1.2 （0.16151*1.2=0.19381），共21种尺度变换
  const float SCALES[] = {0.16151,0.19381,0.23257,0.27908,0.33490,0.40188,0.48225,
                          0.57870,0.69444,0.83333,1,1.20000,1.44000,1.72800,
                          2.07360,2.48832,2.98598,3.58318,4.29982,5.15978,6.19174};
  int width, height, min_bb_side;
  //Rect bbox;
  BoundingBox bbox;
  Size scale;
  int sc=0;
  
  for (int s=0; s < 21; s++){
    width = round(box.width*SCALES[s]);
    height = round(box.height*SCALES[s]);
    min_bb_side = min(height,width);  //bounding box最短的边
	//由于图像片（min_win 为15x15像素）是在bounding box中采样得到的，所以box必须比min_win要大
	//另外，输入的图像肯定得比 bounding box 要大了
    if (min_bb_side < min_win || width > img.cols || height > img.rows)
      continue;
    scale.width = width;
    scale.height = height;
	//push_back在vector类中作用为在vector尾部加入一个数据
	//scales在类TLD中定义：std::vector<cv::Size> scales;
    scales.push_back(scale);  //把该尺度的窗口存入scales容器，避免在扫描时计算，加快检测速度
    for (int y=1; y<img.rows-height; y+=round(SHIFT*min_bb_side)){  //按步长移动窗口
      for (int x=1; x<img.cols-width; x+=round(SHIFT*min_bb_side)){
        bbox.x = x;
        bbox.y = y;
        bbox.width = width;
        bbox.height = height;
		//判断传入的bounding box（目标边界框）与 传入图像中的此时窗口的 重叠度，
		//以此来确定该图像窗口是否含有目标
        bbox.overlap = bbOverlap(bbox, BoundingBox(box));
        bbox.sidx = sc;  //属于第几个尺度
		//grid在类TLD中定义：std::vector<BoundingBox> grid;
		//把本位置和本尺度的扫描窗口存入grid容器
        grid.push_back(bbox);
      }
    }
    sc++;
  }
}

//此函数计算两个bounding box 的重叠度
//重叠度定义为 两个box的交集 与 它们的并集 的比
float TLD::bbOverlap(const BoundingBox& box1, const BoundingBox& box2){
  //先判断坐标，假如它们都没有重叠的地方，就直接返回0
  if (box1.x > box2.x + box2.width) { return 0.0; }
  if (box1.y > box2.y + box2.height) { return 0.0; }
  if (box1.x + box1.width < box2.x) { return 0.0; }
  if (box1.y + box1.height < box2.y) { return 0.0; }

  float colInt =  min(box1.x + box1.width, box2.x + box2.width) - max(box1.x, box2.x);
  float rowInt =  min(box1.y + box1.height, box2.y + box2.height) - max(box1.y, box2.y);

  float intersection = colInt * rowInt;
  float area1 = box1.width * box1.height;
  float area2 = box2.width * box2.height;
  return intersection / (area1 + area2 - intersection);
}

//此函数根据传入的box1（目标边界框），在整帧图像中的全部窗口中寻找与该box1距离最小（即最相似，
//重叠度最大）的num_closest个窗口，然后把这些窗口 归入good_boxes容器（只是把网格数组的索引存入）
//同时，把重叠度小于0.2的，归入 bad_boxes 容器
void TLD::getOverlappingBoxes(const cv::Rect& box1,int num_closest){
  float max_overlap = 0;
  for (int i=0;i<grid.size();i++){
      if (grid[i].overlap > max_overlap) {  //找出重叠度最大的box
          max_overlap = grid[i].overlap;
          best_box = grid[i];       
      }
      if (grid[i].overlap > 0.6){   //重叠度大于0.6的，归入 good_boxes
          good_boxes.push_back(i);
      }
      else if (grid[i].overlap < bad_overlap){  //重叠度小于0.2的，归入 bad_boxes
          bad_boxes.push_back(i);
      }
  }
  //Get the best num_closest (10) boxes and puts them in good_boxes
  if (good_boxes.size()>num_closest){
  //STL中的nth_element()方法找出一个数列中排名第n（下面为第num_closest）的那个数。这个函数运行后
  //在good_boxes[num_closest]前面num_closest个数都比他大，也就是找到最好的num_closest个box了
    std::nth_element(good_boxes.begin(), good_boxes.begin() + num_closest, good_boxes.end(), OComparator(grid));
    //重新压缩good_boxes为num_closest大小
	good_boxes.resize(num_closest);
  }
  //获取good_boxes 的 Hull壳，也就是窗口的边框
  getBBHull();
}

//此函数获取good_boxes 的 Hull壳，也就是窗口（图像）的边框 bounding box
void TLD::getBBHull(){
  int x1=INT_MAX, x2=0;  //INT_MAX 最大的整形数
  int y1=INT_MAX, y2=0;
  int idx;
  for (int i=0;i<good_boxes.size();i++){
      idx= good_boxes[i];
      x1=min(grid[idx].x,x1);   //防止出现负数？？
      y1=min(grid[idx].y,y1);
      x2=max(grid[idx].x + grid[idx].width,x2);
      y2=max(grid[idx].y + grid[idx].height,y2);
  }
  bbhull.x = x1;
  bbhull.y = y1;
  bbhull.width = x2-x1;
  bbhull.height = y2 -y1;
}

//如果两个box的重叠度小于0.5，返回false，否则返回true
bool bbcomp(const BoundingBox& b1,const BoundingBox& b2){
  TLD t;
    if (t.bbOverlap(b1,b2)<0.5)
      return false;
    else
      return true;
}

int TLD::clusterBB(const vector<BoundingBox>& dbb,vector<int>& indexes){
  //FIXME: Conditional jump or move depends on uninitialised value(s)
  const int c = dbb.size();
  //1. Build proximity matrix
  Mat D(c,c,CV_32F);
  float d;
  for (int i=0;i<c;i++){
      for (int j=i+1;j<c;j++){
        d = 1-bbOverlap(dbb[i],dbb[j]);
        D.at<float>(i,j) = d;
        D.at<float>(j,i) = d;
      }
  }
  //2. Initialize disjoint clustering
 float L[c-1]; //Level
 int nodes[c-1][2];
 int belongs[c];
 int m=c;
 for (int i=0;i<c;i++){
    belongs[i]=i;
 }
 for (int it=0;it<c-1;it++){
 //3. Find nearest neighbor
     float min_d = 1;
     int node_a, node_b;
     for (int i=0;i<D.rows;i++){
         for (int j=i+1;j<D.cols;j++){
             if (D.at<float>(i,j)<min_d && belongs[i]!=belongs[j]){
                 min_d = D.at<float>(i,j);
                 node_a = i;
                 node_b = j;
             }
         }
     }
     if (min_d>0.5){
         int max_idx =0;
         bool visited;
         for (int j=0;j<c;j++){
             visited = false;
             for(int i=0;i<2*c-1;i++){
                 if (belongs[j]==i){
                     indexes[j]=max_idx;
                     visited = true;
                 }
             }
             if (visited)
               max_idx++;
         }
         return max_idx;
     }

 //4. Merge clusters and assign level
     L[m]=min_d;
     nodes[it][0] = belongs[node_a];
     nodes[it][1] = belongs[node_b];
     for (int k=0;k<c;k++){
         if (belongs[k]==belongs[node_a] || belongs[k]==belongs[node_b])
           belongs[k]=m;
     }
     m++;
 }
 return 1;

}

//对检测器检测到的目标bounding box进行聚类
//聚类（Cluster）分析是由若干模式（Pattern）组成的，通常，模式是一个度量（Measurement）的向量，或者是多维空间中的
//一个点。聚类分析以相似性为基础，在一个聚类中的模式之间比不在同一聚类中的模式之间具有更多的相似性。
void TLD::clusterConf(const vector<BoundingBox>& dbb,const vector<float>& dconf,vector<BoundingBox>& cbb,vector<float>& cconf){
  int numbb =dbb.size();
  vector<int> T;
  float space_thr = 0.5;
  int c=1;    //记录 聚类的类个数
  switch (numbb){  //检测到的含有目标的bounding box个数
  case 1:
    cbb=vector<BoundingBox>(1,dbb[0]);  //如果只检测到一个，那么这个就是检测器检测到的目标
    cconf=vector<float>(1,dconf[0]);
    return;
    break;
  case 2:
    T =vector<int>(2,0);
	//此函数计算两个bounding box 的重叠度
    if (1 - bbOverlap(dbb[0],dbb[1]) > space_thr){  //如果只检测到两个box，但他们的重叠度小于0.5
      T[1]=1;
      c=2;  //重叠度小于0.5的box，属于不同的类
    }
    break;
  default:  //检测到的box数目大于2个，则筛选出重叠度大于0.5的
    T = vector<int>(numbb, 0);
	//stable_partition()重新排列元素，使得满足指定条件的元素排在不满足条件的元素前面。它维持着两组元素的顺序关系。
	//STL partition就是把一个区间中的元素按照某个条件分成两类。返回第二类子集的起点
	//bbcomp()函数判断两个box的重叠度小于0.5，返回false，否则返回true （分界点是重叠度：0.5）
	//partition() 将dbb划分为两个子集，将满足两个box的重叠度小于0.5的元素移动到序列的前面，为一个子集，重叠度大于0.5的，
	//放在序列后面，为第二个子集，但两个子集的大小不知道，返回第二类子集的起点
    c = partition(dbb, T, (*bbcomp));   //重叠度小于0.5的box，属于不同的类，所以c是不同的类别个数
    //c = clusterBB(dbb,T);
    break;
  }
  
  cconf=vector<float>(c); 
  cbb=vector<BoundingBox>(c);
  printf("Cluster indexes: ");
  BoundingBox bx;
  for (int i=0;i<c;i++){   //类别个数
      float cnf=0;
      int N=0,mx=0,my=0,mw=0,mh=0;
      for (int j=0;j<T.size();j++){  //检测到的bounding box个数
          if (T[j]==i){   //将聚类为同一个类别的box的坐标和大小进行累加
              printf("%d ",i);
              cnf=cnf+dconf[j];
              mx=mx+dbb[j].x;
              my=my+dbb[j].y;
              mw=mw+dbb[j].width;
              mh=mh+dbb[j].height;
              N++;
          }
      }
      if (N>0){   //然后求该类的box的坐标和大小的平均值，将平均值作为该类的box的代表
          cconf[i]=cnf/N;
          bx.x=cvRound(mx/N);
          bx.y=cvRound(my/N);
          bx.width=cvRound(mw/N);
          bx.height=cvRound(mh/N);
          cbb[i]=bx;  //返回的是聚类，每一个类都有一个代表的bounding box
      }
  }
  printf("\n");
}

FerNNClassifier.h

/*
 * FerNNClassifier.h
 *
 *  Created on: Jun 14, 2011
 *      Author: alantrrs
 */

#include <opencv2/opencv.hpp>
#include <stdio.h>
class FerNNClassifier{
private:
  //下面这些参数通过程序开始运行时读入parameters.yml文件进行初始化
  float thr_fern;
  int structSize;
  int nstructs;
  float valid;
  float ncc_thesame;
  float thr_nn;
  int acum;
public:
  //Parameters
  float thr_nn_valid;

  void read(const cv::FileNode& file);
  void prepare(const std::vector<cv::Size>& scales);
  void getFeatures(const cv::Mat& image,const int& scale_idx,std::vector<int>& fern);
  void update(const std::vector<int>& fern, int C, int N);
  float measure_forest(std::vector<int> fern);
  void trainF(const std::vector<std::pair<std::vector<int>,int> >& ferns,int resample);
  void trainNN(const std::vector<cv::Mat>& nn_examples);
  void NNConf(const cv::Mat& example,std::vector<int>& isin,float& rsconf,float& csconf);
  void evaluateTh(const std::vector<std::pair<std::vector<int>,int> >& nXT,const std::vector<cv::Mat>& nExT);
  void show();
  //Ferns Members
  int getNumStructs(){return nstructs;}
  float getFernTh(){return thr_fern;}
  float getNNTh(){return thr_nn;}
  
  struct Feature   //特征结构体
      {
          uchar x1, y1, x2, y2;
          Feature() : x1(0), y1(0), x2(0), y2(0) {}
          Feature(int _x1, int _y1, int _x2, int _y2)
          : x1((uchar)_x1), y1((uchar)_y1), x2((uchar)_x2), y2((uchar)_y2)
          {}
          bool operator ()(const cv::Mat& patch) const
          { 
		    //二维单通道元素可以用Mat::at(i, j)访问，i是行序号，j是列序号
			//返回的patch图像片在(y1,x1)和(y2, x2)点的像素比较值，返回0或者1
			return patch.at<uchar>(y1,x1) > patch.at<uchar>(y2, x2); 
		  }
      };
  //Ferns（蕨类植物：有根、茎、叶之分，不具花）features 特征组？
  std::vector<std::vector<Feature> > features; //Ferns features (one std::vector for each scale)
  std::vector< std::vector<int> > nCounter; //negative counter
  std::vector< std::vector<int> > pCounter; //positive counter
  std::vector< std::vector<float> > posteriors; //Ferns posteriors
  float thrN; //Negative threshold
  float thrP;  //Positive thershold
  
  //NN Members
  std::vector<cv::Mat> pEx; //NN positive examples
  std::vector<cv::Mat> nEx; //NN negative examples
};

FerNNClassifier.cpp

/*
 * FerNNClassifier.cpp
 *
 *  Created on: Jun 14, 2011
 *      Author: alantrrs
 */

#include <FerNNClassifier.h>

using namespace cv;
using namespace std;

void FerNNClassifier::read(const FileNode& file){
  ///Classifier Parameters
  //下面这些参数通过程序开始运行时读入parameters.yml文件进行初始化
  valid = (float)file["valid"];
  ncc_thesame = (float)file["ncc_thesame"];
  nstructs = (int)file["num_trees"];   //树木（由一个特征组构建，每组特征代表图像块的不同视图表示）的个数
  structSize = (int)file["num_features"];  //每棵树的特征个数，也即每棵树的节点个数；树上每一个特征都作为一个决策节点
  thr_fern = (float)file["thr_fern"];
  thr_nn = (float)file["thr_nn"];
  thr_nn_valid = (float)file["thr_nn_valid"];
}

void FerNNClassifier::prepare(const vector<Size>& scales){
  acum = 0;
  //Initialize test locations for features
  int totalFeatures = nstructs * structSize;
  //二维向量  包含全部尺度（scales）的扫描窗口，每个尺度包含totalFeatures个特征
  features = vector<vector<Feature> >(scales.size(), vector<Feature> (totalFeatures));
 
  //opencv中自带的一个随机数发生器的类RNG
  RNG& rng = theRNG();
  
  float x1f,x2f,y1f,y2f;
  int x1, x2, y1, y2;
  //集合分类器基于n个基本分类器，每个分类器都是基于一个pixel comparisons（像素比较集）的；
  //pixel comparisons的产生方法：先用一个归一化的patch去离散化像素空间，产生所有可能的垂直和水平的pixel comparisons
  //然后我们把这些pixel comparisons随机分配给n个分类器，每个分类器得到完全不同的pixel comparisons（特征集合），
  //这样，所有分类器的特征组统一起来就可以覆盖整个patch了
  
  //用随机数去填充每一个尺度扫描窗口的特征
  for (int i=0;i<totalFeatures;i++){
      x1f = (float)rng;
      y1f = (float)rng;
      x2f = (float)rng;
      y2f = (float)rng;
      for (int s=0; s<scales.size(); s++){
          x1 = x1f * scales[s].width;
          y1 = y1f * scales[s].height;
          x2 = x2f * scales[s].width;
          y2 = y2f * scales[s].height;
		  //第s种尺度的第i个特征  两个随机分配的像素点坐标
          features[s][i] = Feature(x1, y1, x2, y2);
      }
  }
  //Thresholds
  thrN = 0.5 * nstructs;

  //Initialize Posteriors  初始化后验概率
  //后验概率指每一个分类器对传入的图像片进行像素对比，每一个像素对比得到0或者1，所有的特征13个comparison对比，
  //连成一个13位的二进制代码x，然后索引到一个记录了后验概率的数组P(y|x)，y为0或者1（二分类），也就是出现x的
  //基础上，该图像片为y的概率是多少对n个基本分类器的后验概率做平均，大于0.5则判定其含有目标
  for (int i = 0; i<nstructs; i++) {
  //每一个每类器维护一个后验概率的分布，这个分布有2^d个条目（entries），这里d是像素比较pixel comparisons
  //的个数，这里是structSize，即13个comparison，所以会产生2^13即8,192个可能的code，每一个code对应一个后验概率
  //后验概率P(y|x)= #p/(#p+#n) ,#p和#n分别是正和负图像片的数目，也就是下面的pCounter和nCounter
  //初始化时，每个后验概率都得初始化为0；运行时候以下面方式更新：已知类别标签的样本（训练样本）通过n个分类器
  //进行分类，如果分类结果错误，那么响应的#p和#n就会更新，这样P(y|x)也相应更新了
      posteriors.push_back(vector<float>(pow(2.0,structSize), 0));
      pCounter.push_back(vector<int>(pow(2.0,structSize), 0));
      nCounter.push_back(vector<int>(pow(2.0,structSize), 0));
  }
}

//该函数得到输入的image的用于树的节点，也就是特征组的特征（13位的二进制代码）
void FerNNClassifier::getFeatures(const cv::Mat& image, const int& scale_idx, vector<int>& fern){
  int leaf;  //叶子  树的最终节点
  //每一个每类器维护一个后验概率的分布，这个分布有2^d个条目（entries），这里d是像素比较pixel comparisons
  //的个数，这里是structSize，即13个comparison，所以会产生2^13即8,192个可能的code，每一个code对应一个后验概率
  for (int t=0; t<nstructs; t++){  //nstructs 表示树的个数 10
      leaf=0;
      for (int f=0; f<structSize; f++){  //表示每棵树特征的个数 13
	    //struct Feature 特征结构体有一个运算符重载 bool operator ()(const cv::Mat& patch) const
		//返回的patch图像片在(y1,x1)和(y2, x2)点的像素比较值，返回0或者1
		//然后leaf就记录了这13位的二进制代码，作为特征
          leaf = (leaf << 1) + features[scale_idx][t*nstructs+f](image);
      }
      fern[t] = leaf; 
  }
}

float FerNNClassifier::measure_forest(vector<int> fern) {
  float votes = 0;
  for (int i = 0; i < nstructs; i++) {
     // 后验概率posteriors[i][idx] = ((float)(pCounter[i][idx]))/(pCounter[i][idx] + nCounter[i][idx]);
      votes += posteriors[i][fern[i]];   //每棵树的每个特征值对应的后验概率累加值 作投票值？？
  }
  return votes;
}

//更新正负样本数，同时更新后验概率
void FerNNClassifier::update(const vector<int>& fern, int C, int N) {
  int idx;
  for (int i = 0; i < nstructs; i++) {
      idx = fern[i];
      (C==1) ? pCounter[i][idx] += N : nCounter[i][idx] += N;
      if (pCounter[i][idx]==0) {
          posteriors[i][idx] = 0;
      } else {
          posteriors[i][idx] = ((float)(pCounter[i][idx]))/(pCounter[i][idx] + nCounter[i][idx]);
      }
  }
}

//训练集合分类器（n个基本分类器集合）
void FerNNClassifier::trainF(const vector<std::pair<vector<int>,int> >& ferns,int resample){
  // Conf = function(2,X,Y,Margin,Bootstrap,Idx)
  //                 0 1 2 3      4         5
  //  double *X     = mxGetPr(prhs[1]); -> ferns[i].first
  //  int numX      = mxGetN(prhs[1]);  -> ferns.size()
  //  double *Y     = mxGetPr(prhs[2]); ->ferns[i].second
  //  double thrP   = *mxGetPr(prhs[3]) * nTREES; ->threshold*nstructs
  //  int bootstrap = (int) *mxGetPr(prhs[4]); ->resample
  
  //thr_fern: 0.6 thrP定义为Positive thershold
  thrP = thr_fern * nstructs;                                    // int step = numX / 10;
  //for (int j = 0; j < resample; j++) {                      // for (int j = 0; j < bootstrap; j++) {
      for (int i = 0; i < ferns.size(); i++){               //   for (int i = 0; i < step; i++) {
                                                            //     for (int k = 0; k < 10; k++) {
                                                            //       int I = k*step + i;//box index
                                                            //       double *x = X+nTREES*I; //tree index
          if(ferns[i].second==1){    //为1表示正样本        //       if (Y[I] == 1) {
		      //measure_forest函数返回所有树的所有特征值对应的后验概率累加值
			  //该累加值如果小于正样本阈值，也就是是输入的是正样本，却被分类成负样本了
			  //出现分类错误，所以就把该样本添加到正样本库，同时更新后验概率
              if(measure_forest(ferns[i].first) <= thrP)      //         if (measure_forest(x) <= thrP)
			  更新正样本数，同时更新后验概率
                update(ferns[i].first, 1, 1);                 //             update(x,1,1);
          }else{                                            //        }else{
              if (measure_forest(ferns[i].first) >= thrN)   //         if (measure_forest(x) >= thrN)
                update(ferns[i].first, 0, 1);                 //             update(x,0,1);
          }
      }
  //}
}

//训练最近邻分类器
void FerNNClassifier::trainNN(const vector<cv::Mat>& nn_examples){
  float conf, dummy;
  vector<int> y(nn_examples.size(),0); //vector<T> v3(n, i); v3包含n个值为i的元素。y数组元素初始化为0
  y[0]=1;  //上面说到调用trainNN这个函数传入的nn_data样本集，只有一个pEx，在nn_data[0]
  vector<int> isin;
  for (int i=0; i<nn_examples.size(); i++){                          //  For each example
      //计算输入图像片与在线模型之间的相关相似度conf
      NNConf(nn_examples[i], isin, conf, dummy);                      //  Measure Relative similarity
	  //thr_nn: 0.65 阈值
	  //标签是正样本，如果相关相似度小于0.65 ，则认为其不含有前景目标，也就是分类错误了；这时候就把它加到正样本库
      if (y[i]==1 && conf <= thr_nn){                                //    if y(i) == 1 && conf1 <= tld.model.thr_nn % 0.65
          if (isin[1]<0){                                          //      if isnan(isin(2))
              pEx = vector<Mat>(1,nn_examples[i]);                 //        tld.pex = x(:,i);
              continue;                                            //        continue;
          }                                                        //      end
          //pEx.insert(pEx.begin()+isin[1],nn_examples[i]);        //      tld.pex = [tld.pex(:,1:isin(2)) x(:,i) tld.pex(:,isin(2)+1:end)]; % add to model
          pEx.push_back(nn_examples[i]);
      }                                                            //    end
      if(y[i]==0 && conf>0.5)                                      //  if y(i) == 0 && conf1 > 0.5
        nEx.push_back(nn_examples[i]);                             //    tld.nex = [tld.nex x(:,i)];

  }                                                                 //  end
  acum++;
  printf("%d. Trained NN examples: %d positive %d negative\n",acum,(int)pEx.size(),(int)nEx.size());
}                                                                  //  end

  /*Inputs:
   * -NN Patch
   * Outputs:
   * -Relative Similarity (rsconf)相关相似度, Conservative Similarity (csconf)保守相似度,
   * In pos. set|Id pos set|In neg. set (isin)
   */
void FerNNClassifier::NNConf(const Mat& example, vector<int>& isin,float& rsconf,float& csconf){
  isin=vector<int>(3,-1);  //vector<T> v3(n, i); v3包含n个值为i的元素。 三个元素都是-1
  if (pEx.empty()){ //if isempty(tld.pex) % IF positive examples in the model are not defined THEN everything is negative
      rsconf = 0; //    conf1 = zeros(1,size(x,2));
      csconf=0;
      return;
  }
  if (nEx.empty()){ //if isempty(tld.nex) % IF negative examples in the model are not defined THEN everything is positive
      rsconf = 1;   //    conf1 = ones(1,size(x,2));
      csconf=1;
      return;
  }
  Mat ncc(1,1,CV_32F);
  float nccP, csmaxP, maxP=0;
  bool anyP=false;
  int maxPidx, validatedPart = ceil(pEx.size()*valid);  //ceil返回大于或者等于指定表达式的最小整数
  float nccN, maxN=0;
  bool anyN=false;
  //比较图像片p到在线模型M的距离（相似度），计算正样本最近邻相似度，也就是将输入的图像片与
  //在线模型中所有的图像片进行匹配，找出最相似的那个图像片，也就是相似度的最大值
  for (int i=0;i<pEx.size();i++){
      matchTemplate(pEx[i], example, ncc, CV_TM_CCORR_NORMED);      // measure NCC to positive examples
      nccP=(((float*)ncc.data)[0]+1)*0.5;  //计算匹配相似度
      if (nccP>ncc_thesame)  //ncc_thesame: 0.95
        anyP=true;
      if(nccP > maxP){
          maxP=nccP;    //记录最大的相似度以及对应的图像片index索引值
          maxPidx = i;
          if(i<validatedPart)
            csmaxP=maxP;
      }
  }
  //计算负样本最近邻相似度
  for (int i=0;i<nEx.size();i++){
      matchTemplate(nEx[i],example,ncc,CV_TM_CCORR_NORMED);     //measure NCC to negative examples
      nccN=(((float*)ncc.data)[0]+1)*0.5;
      if (nccN>ncc_thesame)
        anyN=true;
      if(nccN > maxN)
        maxN=nccN;
  }
  //set isin
  //if he query patch is highly correlated with any positive patch in the model then it is considered to be one of them
  if (anyP) isin[0]=1;  
  isin[1]=maxPidx;      //get the index of the maximall correlated positive patch
  //if  the query patch is highly correlated with any negative patch in the model then it is considered to be one of them
  if (anyN) isin[2]=1; 
  
  //Measure Relative Similarity
  //相关相似度 = 正样本最近邻相似度 / （正样本最近邻相似度 + 负样本最近邻相似度）
  float dN=1-maxN;
  float dP=1-maxP;
  rsconf = (float)dN/(dN+dP);
  
  //Measure Conservative Similarity
  dP = 1 - csmaxP;
  csconf =(float)dN / (dN + dP);
}

void FerNNClassifier::evaluateTh(const vector<pair<vector<int>,int> >& nXT, const vector<cv::Mat>& nExT){
  float fconf;
  for (int i=0;i<nXT.size();i++){
  //所有基本分类器的后验概率的平均值如果大于thr_fern，则认为含有前景目标
  //measure_forest返回的是所有后验概率的累加和，nstructs 为树的个数，也就是基本分类器的数目 ？？
    fconf = (float) measure_forest(nXT[i].first)/nstructs;
    if (fconf>thr_fern)  //thr_fern: 0.6 thrP定义为Positive thershold
      thr_fern = fconf;  //取这个平均值作为 该集合分类器的 新的阈值，这就是训练？？
  }
  
  vector <int> isin;
  float conf, dummy;
  for (int i=0; i<nExT.size(); i++){
      NNConf(nExT[i], isin, conf, dummy);
      if (conf > thr_nn)
        thr_nn = conf; //取这个最大相关相似度作为 该最近邻分类器的 新的阈值，这就是训练？？
  }
  
  if (thr_nn > thr_nn_valid)  //thr_nn_valid: 0.7
    thr_nn_valid = thr_nn;
}

//把正样本库（在线模型）包含的所有正样本显示在窗口上
void FerNNClassifier::show(){
  Mat examples((int)pEx.size()*pEx[0].rows, pEx[0].cols, CV_8U);
  double minval;
  Mat ex(pEx[0].rows, pEx[0].cols, pEx[0].type());
  for (int i=0;i<pEx.size();i++){
    //minMaxLoc寻找矩阵（一维数组当作向量，用Mat定义）中最小值和最大值的位置. 
    minMaxLoc(pEx[i], &minval); //寻找pEx[i]的最小值
    pEx[i].copyTo(ex);
    ex = ex - minval;  //把像素亮度最小的像素重设为0，其他像素按此重设
	//Mat Mat::rowRange(int startrow, int endrow) const 为指定的行span创建一个新的矩阵头。
	//Mat Mat::rowRange(const Range& r) const   //Range 结构包含着起始和终止的索引值。
    Mat tmp = examples.rowRange(Range(i*pEx[i].rows, (i+1)*pEx[i].rows));
    ex.convertTo(tmp, CV_8U);
  }
  imshow("Examples", examples);
}