meanshift 与 camshift 跟踪算法比较

最新推荐文章于 2024-05-15 15:57:50 发布

asaasa66

最新推荐文章于 2024-05-15 15:57:50 发布

阅读量6.8k

点赞数

文章标签：算法 object image algorithm float parameters

MeanShift Algorithm

思想很简单：利用概率密度的梯度爬升来寻找局部最优...具体参考文献：

[1]The Estimation of the Gradient of a Density Function, with Applications in Pattern Recognition (1975)

[2]Mean shift, mode seeking, and clustering (1995)

[3]Mean Shift: a robust approach toward feature space analysis (2002)

[4]Real-time tracking of non-rigid objects using mean shift (2000)

[5]Mean-shift Blob Tracking through Scale Space (2003)

[6]An algorithm for data-driven bandwidth selection(2003)

对于OpenCV的Meanshift算法貌似只是简化成了一个重心跟踪法，没有引入核函数与巴氏系数....

怪不得跟踪的效果那么差...

具体计算过程如下：

1.计算区域内0阶矩
for(int i=0;i<height;i++)
for(int j=0;j<width;j++)
     M00+=I(i,j)
2.区域内1阶矩：
for(int i=0;i<height;i++)
for(int j=0;j<width;j++)
  {
    M10+=i*I(i,j);
    M01+=j*I(i,j);
}
3.则Mass Center为：
Xc=M10/M00; Yc=M01/M00

具体的CVMEANSHIFT算法可以分为以下4步：
1.选择窗的大小和初始位置.
2.计算此时窗口内的Mass Center.
3.调整窗口的中心到Mass Center.
4.重复2和3，直到窗口中心"会聚"，即每次窗口移动的距离小于一定的阈值，或者迭代次数达到设定值。

int cvMeanShift(IplImage* imgprob,CvRect windowIn,
CvTermCriteria criteria,CvConnectedComp* out);

 
     view plain 
    
 函数说明：  
 需要的参数为：  
 1.IplImage* imgprob：2D概率分布图像，传入；  
 2.CvRect windowIn：初始的窗口，传入；  
 3.CvTermCriteria criteria：停止迭代的标准，传入；  
 4.CvConnectedComp* out:查询结果，传出。  
 (注：构造CvTermCriteria变量需要三个参数，一个是类型，另一个是迭代的最大次数，最后一个表示特定的阈值。例如可以这样构造criteria：criteria=cvTermCriteria(CV_TERMCRIT_ITER|CV_TERMCRIT_EPS,10,0.1)。)  
   
    
      Parameters:  
        imgProb     - 2D object probability distribution  
        windowIn    - CvRect of CAMSHIFT Window intial size  
        numIters    - If CAMSHIFT iterates this many times, stop  
        windowOut   - Location, height and width of converged CAMSHIFT window  
        len         - If != NULL, return equivalent len  
        width       - If != NULL, return equivalent width  
        itersUsed   - Returns number of iterations CAMSHIFT took to converge  
      Returns:  
        The function itself returns the area found  
    
    
   
    
   
    
   
 int cvMeanShift( const void* imgProb, CvRect windowIn,  
    CvTermCriteria criteria, CvConnectedComp* comp )  
 {  
  CvMoments moments;  
  int    i = 0, eps;  
  CvMat  stub, *mat = (CvMat*)imgProb;  
  CvMat  cur_win;  
  CvRect cur_rect = windowIn;  
    
  CV_FUNCNAME( "cvMeanShift" );  
    
  if( comp )  
   comp->rect = windowIn;  
    
  moments.m00 = moments.m10 = moments.m01 = 0;  
    
  __BEGIN__;  
    
  CV_CALL( mat = cvGetMat( mat, &stub ));  
    
   
    
  if( windowIn.height <= 0 || windowIn.width <= 0 )  
   CV_ERROR( CV_StsBadArg, "Input window has non-positive sizes" );  
    
  if( windowIn.x < 0 || windowIn.x + windowIn.width > mat->cols ||  
   windowIn.y < 0 || windowIn.y + windowIn.height > mat->rows )  
   CV_ERROR( CV_StsBadArg, "Initial window is not inside the image ROI" );  
    
  CV_CALL( criteria = cvCheckTermCriteria( criteria, 1., 100 ));  
    
  eps = cvRound( criteria.epsilon * criteria.epsilon );  
    
  for( i = 0; i < criteria.max_iter; i++ )  
  {  
   int dx, dy, nx, ny;  
   double inv_m00;  
     
   CV_CALL( cvGetSubRect( mat, &cur_win, cur_rect ));  
   CV_CALL( cvMoments( &cur_win, &moments ));  
     
     
   if( fabs(moments.m00) < DBL_EPSILON )  
    break;  
     
   inv_m00 = moments.inv_sqrt_m00*moments.inv_sqrt_m00;  
   dx = cvRound( moments.m10 * inv_m00 - windowIn.width*0.5 );  
   dy = cvRound( moments.m01 * inv_m00 - windowIn.height*0.5 );  
     
   nx = cur_rect.x + dx;  
   ny = cur_rect.y + dy;  
     
   if( nx < 0 )  
    nx = 0;  
   else if( nx + cur_rect.width > mat->cols )  
    nx = mat->cols - cur_rect.width;  
     
   if( ny < 0 )  
    ny = 0;  
   else if( ny + cur_rect.height > mat->rows )  
    ny = mat->rows - cur_rect.height;  
     
   dx = nx - cur_rect.x;  
   dy = ny - cur_rect.y;  
   cur_rect.x = nx;  
   cur_rect.y = ny;  
   
     
     
   if( dx*dx + dy*dy < eps )  
    break;  
  }  
    
  __END__;  
    
  if( comp )  
  {  
   comp->rect = cur_rect;  
   comp->area = (float)moments.m00;  
  }  
    
  return i;  
  }  
    

Camshift Algorithm

它是MeanShift算法的改进，称为连续自适应的MeanShift算法，CamShift算法的全称是"Continuously Apaptive Mean-SHIFT"，它的基本思想是视频图像的所有帧作MeanShift运算，并将上一帧的结果（即Search Window的中心和大小）作为下一帧MeanShift算法的Search Window的初始值，如此迭代下去。

Camshift 是由Meanshift 推倒而來 Meanshift主要是用在單張影像上，但
是獨立一張影像分析對追蹤而言並無意義，Camshift 就是利用MeanShift的方
法，對影像串列進行分析。
(1) 首先在影像串列中選擇ㄧ區域。
(2) 計算此區域的顏色2D機率分布。
(3) 用MeanShift演算法來收斂欲追蹤的區域。
(4) 集中收斂的區域，並標示之。
(5) 每個frame重複(3)(4)。

Camshift 关键就在于当目标的大小发生改变的时候，此算法可以自适应调整目标区域继续跟踪。没什么多说的，给出源码吧，里面有部分代码是计算代码执行时间的，不需要的可以去掉。

如果要详细了解，去看下这篇参考文献吧：

Bradski, Computer Video Face Tracking for use in a Perceptual User Interface. Intel Technology Journal, Q2, 1998.

 
     view plain 
    
 函数说明：  
   
   
      Parameters:  
        imgProb     - 2D object probability distribution  
        windowIn    - CvRect of CAMSHIFT Window intial size  
        criteria    - criteria of stop finding window  
        windowOut   - Location, height and width of converged CAMSHIFT window  
        orientation - If != NULL, return distribution orientation  
        len         - If != NULL, return equivalent len  
        width       - If != NULL, return equivalent width  
        area        - sum of all elements in result window  
        itersUsed   - Returns number of iterations CAMSHIFT took to converge  
      Returns:  
       The function itself returns the area found  
    
   
    
   
    
   
    
   
 int cvCamShift( const void* imgProb, CvRect windowIn,  
     CvTermCriteria criteria,  
     CvConnectedComp* _comp,  
     CvBox2D* box )  
  {  
   QueryPerformanceFrequency(&freq);  
   QueryPerformanceCounter(&start1);  
   
   const int TOLERANCE = 10;  
   CvMoments moments;  
   double m00 = 0, m10, m01, mu20, mu11, mu02, inv_m00;  
   double a, b, c, xc, yc;  
   double rotate_a, rotate_c;  
   double theta = 0, square;  
   double cs, sn;  
   double length = 0, width = 0;  
   int itersUsed = 0;  
   CvConnectedComp comp;  
   CvMat  cur_win, stub, *mat = (CvMat*)imgProb;  
   
   CV_FUNCNAME( "cvCamShift" );  
   
   comp.rect = windowIn;  
   
   __BEGIN__;  
   
   CV_CALL( mat = cvGetMat( mat, &stub ));  
   
   CV_CALL( itersUsed = cvMeanShift( mat, windowIn, criteria, &comp ));  
   windowIn = comp.rect;  
   
     
   windowIn.x -= TOLERANCE;  
   if( windowIn.x < 0 )  
    windowIn.x = 0;  
   
   windowIn.y -= TOLERANCE;  
   if( windowIn.y < 0 )  
    windowIn.y = 0;  
   
   windowIn.width += 2 * TOLERANCE;  
   if( windowIn.x + windowIn.width > mat->width )  
    windowIn.width = mat->width - windowIn.x;  
   
   windowIn.height += 2 * TOLERANCE;  
   if( windowIn.y + windowIn.height > mat->height )  
    windowIn.height = mat->height - windowIn.y;  
   
   CV_CALL( cvGetSubRect( mat, &cur_win, windowIn ));  
   
     
   CV_CALL( cvMoments( &cur_win, &moments ));  
   
   m00 = moments.m00;  
   m10 = moments.m10;  
   m01 = moments.m01;  
   mu11 = moments.mu11;  
   mu20 = moments.mu20;  
   mu02 = moments.mu02;  
   
   if( fabs(m00) < DBL_EPSILON )  
    EXIT;  
   
   inv_m00 = 1. / m00;  
   xc = cvRound( m10 * inv_m00 + windowIn.x );  
   yc = cvRound( m01 * inv_m00 + windowIn.y );  
   a = mu20 * inv_m00;  
   b = mu11 * inv_m00;  
   c = mu02 * inv_m00;  
   
     
   square = sqrt( 4 * b * b + (a - c) * (a - c) );  
   
     
   theta = atan2( 2 * b, a - c + square );  
   
     
   cs = cos( theta );  
   sn = sin( theta );  
   
   rotate_a = cs * cs * mu20 + 2 * cs * sn * mu11 + sn * sn * mu02;  
   rotate_c = sn * sn * mu20 - 2 * cs * sn * mu11 + cs * cs * mu02;  
   length = sqrt( rotate_a * inv_m00 ) * 4;  
   width = sqrt( rotate_c * inv_m00 ) * 4;  
   
     
   if( length < width )  
   {  
    double t;  
      
    CV_SWAP( length, width, t );  
    CV_SWAP( cs, sn, t );  
    theta = CV_PI*0.5 - theta;  
   }  
   
     
   if( _comp || box )  
   {  
    int t0, t1;  
    int _xc = cvRound( xc );  
    int _yc = cvRound( yc );  
   
    t0 = cvRound( fabs( length * cs ));  
    t1 = cvRound( fabs( width * sn ));  
   
    t0 = MAX( t0, t1 ) + 2;  
    comp.rect.width = MIN( t0, (mat->width - _xc) * 2 );  
   
    t0 = cvRound( fabs( length * sn ));  
    t1 = cvRound( fabs( width * cs ));  
   
    t0 = MAX( t0, t1 ) + 2;  
    comp.rect.height = MIN( t0, (mat->height - _yc) * 2 );  
   
    comp.rect.x = MAX( 0, _xc - comp.rect.width / 2 );  
    comp.rect.y = MAX( 0, _yc - comp.rect.height / 2 );  
   
    comp.rect.width = MIN( mat->width - comp.rect.x, comp.rect.width );  
    comp.rect.height = MIN( mat->height - comp.rect.y, comp.rect.height );  
    comp.area = (float) m00;  
   }  
   
   __END__;  
   
   if( _comp )  
    *_comp = comp;  
     
   if( box )  
   {  
    box->size.height = (float)length;  
    box->size.width = (float)width;  
    box->angle = (float)(theta*180./CV_PI);  
    box->center = cvPoint2D32f( comp.rect.x + comp.rect.width*0.5f,  
           comp.rect.y + comp.rect.height*0.5f);  
   }  
   
   
   QueryPerformanceCounter(&end1);  
   
   time_origin<<(double)(end1.QuadPart - start1.QuadPart) / (double)freq.QuadPart<<endl;  
   
   return itersUsed;  
   
 }  

对于OPENCV中的CAMSHIFT例子，是通过计算目标HSV空间下的HUE分量直方图，通过直方图反向投影得到目标像素的概率分布，然后通过调用CV库中的CAMSHIFT算法，自动跟踪并调整目标窗口的中心位置与大小。

这个算法对于纯色物体在黑白背景下的跟踪效果是很好的，但是如果背景的颜色与目标相近，或者目标附近有与目标的色调相近的算法比较物体，则CAMSHIFT会自动将其包括在内，导致跟踪窗口扩大，甚至有时会将跟踪窗口扩大到整个视频框架。

昨天看Learning OpenCV 看完了第十章，课后习题里有题就是将camshift改成meanshift算法比较一下结果，我自己改了一下，用meanshift的矩形框跟踪物体，由于meanshift不会改变核窗口的大小，所以矩形框当然是不变的...

与camshift比较了一下，由于都是通过H直方图反向投影的算法，实际是大差不差的，实验证明，对于较远的小的目标，使用meanshift算法比较好，因为目标大小一般不变，而且窗口不容易受外界影响，对于近距离的目标，尺寸会与镜头距离的远近而改变的，使用camshift可以自适应的改变。

Learning OpenCV 中也提到了可以使用两种方法结合来加强跟踪的鲁棒性，我个人觉得这两种方法其实没什么根本区别，也就不存在什么结合的问题了呵呵。

下面是修改的代码选取目标采用了蓝色方框跟踪的目标采用了红色方框

 
     view plain 
    
 //---------------------------------------------------------------------------  
 #include <vcl.h>  
 //-------open cv macro begin-------------  
 #ifdef _CH_  
 #pragma package <opencv>  
 #endif  
   
 #define phi2xy(mat)                                                  /  
   cvPoint( cvRound(img->width/2 + img->width/3*cos(mat->data.fl[0])),/  
     cvRound( img->height/2 - img->width/3*sin(mat->data.fl[0])) )  
   
    
   
 #include <stdio.h>  
 #include <iostream.h>  
 #include <fstream.h>  
 #include "cv.h"  
 #include "highgui.h"  
 //-------open cv macro end-------------  
   
 #pragma hdrstop  
    
 #include "Unit1.h"  
 //---------------------------------------------------------------------------  
 #pragma package(smart_init)  
 #pragma resource "*.dfm"  
 TForm1 *Form1;  
   
   
 IplImage *image = 0, *hsv = 0, *hue = 0, *mask = 0, *backproject = 0, *histimg = 0;  
 CvHistogram *hist = 0;  
   
 int backproject_mode = 0;  
 int select_object = 0;  
 int track_object = 0;  
 int show_hist = 1;  
 CvPoint origin;  
 CvRect selection;  
 CvRect track_window;  
 CvBox2D track_box;  
 CvConnectedComp track_comp;  
 int hdims = 256;  
 float hranges_arr[] = {0,180};  
 float* hranges = hranges_arr;  
 int vmin = 10, vmax = 256, smin = 30;  
   
    
   
  //---------------------------------------------------------------------------  
   
   
 CvSize cvGetSize( IplImage *img )  
 {  
  CvSize aa;  
  aa.width=img->width;  
  aa.height=img->height;  
  return aa;  
 }  
   
    
   
    
   
   
 //---------------------------------------------------------------------------  
 __fastcall TForm1::TForm1(TComponent* Owner)  
         : TForm(Owner)  
 {  
 }  
   
 //---------------------------------------------------------------------------  
   
   
 void on_mouse( int event, int x, int y, int flags, void* param )  
 {  
     if( !image )  
         return;  
   
     if( image->origin )  
         y = image->height - y;  
   
     if( select_object )  
     {  
         selection.x = MIN(x,origin.x);  
         selection.y = MIN(y,origin.y);  
         selection.width = selection.x + CV_IABS(x - origin.x);  
         selection.height = selection.y + CV_IABS(y - origin.y);  
           
         selection.x = MAX( selection.x, 0 );  
         selection.y = MAX( selection.y, 0 );  
         selection.width = MIN( selection.width, image->width );  
         selection.height = MIN( selection.height, image->height );  
         selection.width -= selection.x;  
         selection.height -= selection.y;  
     }  
   
     switch( event )  
     {  
     case CV_EVENT_LBUTTONDOWN:  
         origin = cvPoint(x,y);  
         selection = cvRect(x,y,0,0);  
         select_object = 1;  
         break;  
     case CV_EVENT_LBUTTONUP:  
         select_object = 0;  
         if( selection.width > 0 && selection.height > 0 )  
             track_object = -1;  
         break;  
     }  
 }  
   
   
 CvScalar hsv2rgb( float hue )  
 {  
     int rgb[3], p, sector;  
     static const int sector_data[][3]=  
         {{0,2,1}, {1,2,0}, {1,0,2}, {2,0,1}, {2,1,0}, {0,1,2}};  
     hue *= 0.033333333333333333333333333333333f;  
     sector = cvFloor(hue);  
     p = cvRound(255*(hue - sector));  
     p ^= sector & 1 ? 255 : 0;  
   
     rgb[sector_data[sector][0]] = 255;  
     rgb[sector_data[sector][1]] = 0;  
     rgb[sector_data[sector][2]] = p;  
   
     return cvScalar(rgb[2], rgb[1], rgb[0],0);  
 }  
   
   
 //---------------------------------------------------------------------------  
   
    
   
 void __fastcall TForm1::Button1Click(TObject *Sender)  
 {  
   CvCapture* capture = 0;  
   
        // capture = cvCaptureFromCAM(  0 );  
   
     capture = cvCaptureFromAVI("video.avi" );  
     ShowMessage( "Hot keys: /n"  
         "/tESC - quit the program/n"  
         "/tc - stop the tracking/n"  
         "/tb - switch to/from backprojection view/n"  
         "/th - show/hide object histogram/n"  
         "To initialize tracking, select the object with mouse/n" );  
   
     cvNamedWindow( "Histogram", 1 );  
     cvNamedWindow( "CamShiftDemo", 1 );  
     cvSetMouseCallback( "CamShiftDemo", on_mouse, 0 );  
     cvCreateTrackbar( "Vmin", "CamShiftDemo", &vmin, 256, 0 );  
     cvCreateTrackbar( "Vmax", "CamShiftDemo", &vmax, 256, 0 );  
     cvCreateTrackbar( "Smin", "CamShiftDemo", &smin, 256, 0 );  
   
     for(;;)  
     {  
         IplImage* frame = 0;  
         int i, bin_w, c;  
   
         frame = cvQueryFrame( capture );  
         if( !frame )  
             break;  
   
         if( !image )  
         {  
             /* allocate all the buffers */  
             image = cvCreateImage( cvGetSize(frame), 8, 3 );  
             image->origin = frame->origin;  
             hsv = cvCreateImage( cvGetSize(frame), 8, 3 );  
             hue = cvCreateImage( cvGetSize(frame), 8, 1 );  
             mask = cvCreateImage( cvGetSize(frame), 8, 1 );  
             backproject = cvCreateImage( cvGetSize(frame), 8, 1 );  
             hist = cvCreateHist( 1, &hdims, CV_HIST_ARRAY, &hranges, 1 );  
             histimg = cvCreateImage( cvSize(320,200), 8, 3 );  
             cvZero( histimg );  
         }  
   
         cvCopy( frame, image, 0 );  
         cvCvtColor( image, hsv, CV_BGR2HSV );  
   
         if( track_object )  
         {  
             int _vmin = vmin, _vmax = vmax;  
   
             cvInRangeS( hsv, cvScalar(0,smin,MIN(_vmin,_vmax),0),  
                         cvScalar(180,256,MAX(_vmin,_vmax),0), mask );  
             cvSplit( hsv, hue, 0, 0, 0 );  
   
             if( track_object < 0 )  
             {  
                 float max_val = 0.f;  
                 cvSetImageROI( hue, selection );  
                 cvSetImageROI( mask, selection );  
                 cvCalcHist( &hue, hist, 0, mask );  
                 cvGetMinMaxHistValue( hist, 0, &max_val, 0, 0 );  
                 cvConvertScale( hist->bins, hist->bins, max_val ? 255. / max_val : 0., 0 );  
                 cvResetImageROI( hue );  
                 cvResetImageROI( mask );  
                 track_window = selection;  
                 track_object = 1;  
   
                 cvZero( histimg );  
                 bin_w = histimg->width / hdims;  
                 for( i = 0; i < hdims; i++ )  
                 {  
                     int val = cvRound( cvGetReal1D(hist->bins,i)*histimg->height/255 );  
                     CvScalar color = hsv2rgb(i*180.f/hdims);  
                     cvRectangle( histimg, cvPoint(i*bin_w,histimg->height),  
                                  cvPoint((i+1)*bin_w,histimg->height - val),  
                                  color, -1, 8, 0 );  
                 }  
             }  
   
             cvCalcBackProject( &hue, backproject, hist );  
             cvAnd( backproject, mask, backproject, 0 );  
             //cvCamShift( backproject, track_window,  
             //            cvTermCriteria( CV_TERMCRIT_EPS | CV_TERMCRIT_ITER, 10, 1 ),  
             //            &track_comp, &track_box );  
             cvMeanShift( backproject, track_window,  
                         cvTermCriteria( CV_TERMCRIT_EPS | CV_TERMCRIT_ITER, 10, 1 ),  
                         &track_comp );  
             track_window = track_comp.rect;  
               
             if( backproject_mode )  
                 cvCvtColor( backproject, image, CV_GRAY2BGR );  
             if( image->origin )  
                 track_box.angle = -track_box.angle;  
             //cvEllipseBox( image, track_box, CV_RGB(255,0,0), 3, CV_AA, 0 );  
              cvRectangle(image, cvPoint(track_comp.rect.x , track_comp.rect.y),  
              cvPoint(track_comp.rect.x+track_comp.rect.width , track_comp.rect.y+track_comp.rect.height)  
              ,  CV_RGB(255,0,0), 1,  CV_AA, 0);  
               }  
           
         if( select_object && selection.width > 0 && selection.height > 0 )  
         {  
              cvRectangle(image, cvPoint(selection.x , selection.y),  
              cvPoint(selection.x+selection.width , selection.y+selection.height)  
               ,  CV_RGB(0,0,255), 1,  CV_AA , 0);  
         }  
   
         cvShowImage( "CamShiftDemo", image );  
         cvShowImage( "Histogram", histimg );  
   
         c = cvWaitKey(100);  
         if( (char) c == 27 )  
             break;  
         switch( (char) c )  
         {  
         case 'b':  
             backproject_mode ^= 1;  
             break;  
         case 'c':  
             track_object = 0;  
             cvZero( histimg );  
             break;  
         case 'h':  
             show_hist ^= 1;  
             if( !show_hist )  
                 cvDestroyWindow( "Histogram" );  
             else  
                 cvNamedWindow( "Histogram", 1 );  
             break;  
         default:  
             ;  
         }  
     }  
   
     cvReleaseCapture( &capture );  
     cvDestroyWindow("CamShiftDemo");  
   
 }  
 //---------------------------------------------------------------------------