SURF (Speeded Up Robust Features)是一种具有鲁棒性的局部特征检测算法,它首先由Herbert Bay等人于2006年提出,并在2008年进行了完善。其实该算法是Herbert Bay在博士期间的研究内容,并作为博士毕业论文的一部分发表。
SURF算法的部分灵感来自于SIFT算法,但正如它的名字一样,该算法除了具有重复性高的检测器和可区分性好的描述符特点外,还具有很强的鲁棒性以及更高的运算速度,如Bay所述,SURF至少比SIFT快3倍以上,综合性能要优于SIFT算法。与SIFT算法一样,SURF算法也在美国申请了专利。
与我的上一篇SIFT文章一样,该文分为原理解释、源码分析和应用实例。但该文的内容也较多,对公式和图表的粘贴复制太麻烦,所以我也把这篇文章上传到了百度文库和csdn,这样更方便大家阅读,网址为:
http://download.csdn.net/detail/zhaocj/8347849
http://wenku.baidu.com/view/2f1e4d8ef705cc1754270945.html
在这里我仅把源码分析部分复制到这里,建议大家还是把原理和源码结合着看,这样更容易理解,所以最好还是看详细的全文。
实现SURF算法的文件为sources/modules/nonfree/src/surf.cpp
SURF类的构造函数为:
SURF::SURF()
SURF::SURF(doublehessianThreshold, int nOctaves=4, int nOctaveLayers=2, bool extended=true,
bool upright=false )
参数的含义为:
hessianThreshold为Hessian矩阵行列式响应值的阈值
nOctaves为图像堆的组数
nOctaveLayers为图像堆中每组中的中间层数,该值加2等于每组图像中所包含的层数
extended表示是128维描述符,还是64维描述符,为true时,表示128维描述符
upright表示是否采用U-SURF算法,为true时,采用U-SURF算法
SURF类的重载运算符( )为:
- void SURF::operator()(InputArray _img, InputArray _mask,
- CV_OUT vector<KeyPoint>& keypoints,
- OutputArray _descriptors,
- bool useProvidedKeypoints) const
- //_img表示输入8位灰度图像
- //_mask为掩码矩阵
- //keypoints为特征点矢量数组
- //_descriptors为特征点描述符
- //useProvidedKeypoints表示是否运行特征点的检测,该值为true表示不进行特征点的检测,而只是利用输入的特征点进行特征点描述符的运算。
- {
- //定义、初始化各种矩阵,sum为输入图像的积分图像矩阵,mask1和msum为掩码所需矩阵
- Mat img = _img.getMat(), mask = _mask.getMat(), mask1, sum, msum;
- //变量doDescriptors表示是否进行特征点描述符的运算
- bool doDescriptors = _descriptors.needed();
- //确保输入图像的正确
- CV_Assert(!img.empty() && img.depth() == CV_8U);
- if( img.channels() > 1 )
- cvtColor(img, img, COLOR_BGR2GRAY);
- //确保各类输入参数的正确
- CV_Assert(mask.empty() || (mask.type() == CV_8U && mask.size() == img.size()));
- CV_Assert(hessianThreshold >= 0);
- CV_Assert(nOctaves > 0);
- CV_Assert(nOctaveLayers > 0);
- //计算输入图像的积分图像
- integral(img, sum, CV_32S);
- // Compute keypoints only if we are not asked for evaluating the descriptors are some given locations:
- //是否进行特征点检测运算,useProvidedKeypoints=false表示进行该运算
- if( !useProvidedKeypoints )
- {
- if( !mask.empty() ) //掩码
- {
- cv::min(mask, 1, mask1);
- integral(mask1, msum, CV_32S); //msum为掩码图像的积分图像
- }
- //进行Fast-Hessian特征点检测,fastHessianDetector函数在后面给出详细解释
- fastHessianDetector( sum, msum, keypoints, nOctaves, nOctaveLayers, (float)hessianThreshold );
- }
- //N为所检测到的特征点数量
- int i, j, N = (int)keypoints.size();
- if( N > 0 ) //如果检测到了特征点
- {
- Mat descriptors; //描述符变量矩阵
- bool _1d = false;
- //由extended变量得到描述符是128维的还是64维的
- int dcols = extended ? 128 : 64;
- //一个描述符的存储空间大小
- size_t dsize = dcols*sizeof(float);
- if( doDescriptors ) //需要得到描述符
- {
- //定义所有描述符的排列形式
- _1d = _descriptors.kind() == _InputArray::STD_VECTOR && _descriptors.type() == CV_32F;
- if( _1d )
- {
- //所有特征点的描述符连在一起,组成一个矢量
- _descriptors.create(N*dcols, 1, CV_32F);
- descriptors = _descriptors.getMat().reshape(1, N);
- }
- else
- {
- //一个特征点就是一个描述符,一共N个描述符
- _descriptors.create(N, dcols, CV_32F);
- descriptors = _descriptors.getMat();
- }
- }
- // we call SURFInvoker in any case, even if we do not need descriptors,
- // since it computes orientation of each feature.
- //方向角度的分配和描述符的生成,SURFInvoker在后面给出了详细的解释
- parallel_for_(Range(0, N), SURFInvoker(img, sum, keypoints, descriptors, extended, upright) );
- // remove keypoints that were marked for deletion
- //删除那些被标注为无效的特征点
- // i表示特征点删除之前的特征点索引,j表示删除之后的特征点索引
- for( i = j = 0; i < N; i++ )
- {
- if( keypoints[i].size > 0 ) //size大于0,表示该特征点为有效的特征点
- {
- if( i > j ) // i > j表示在i之前有被删除的特征点
- {
- //用后面的特征点依次填补被删除的那些特征点的位置
- keypoints[j] = keypoints[i];
- //相应的描述符的空间位置也要调整
- if( doDescriptors )
- memcpy( descriptors.ptr(j), descriptors.ptr(i), dsize);
- }
- j++; //索引值计数
- }
- }
- //N表示特征点删除之前的总数,j表示删除之后的总数,N > j表明有一些特征点被删除
- if( N > j )
- {
- //重新赋值特征点的总数
- N = j;
- keypoints.resize(N);
- if( doDescriptors )
- {
- //根据描述符的排列形式,重新赋值
- Mat d = descriptors.rowRange(0, N);
- if( _1d )
- d = d.reshape(1, N*dcols);
- d.copyTo(_descriptors);
- }
- }
- }
- }
SURF算法中特征点检测函数fastHessianDetector:
- static void fastHessianDetector( const Mat& sum, const Mat& mask_sum, vector<KeyPoint>& keypoints, int nOctaves, int nOctaveLayers, float hessianThreshold )
- {
- /* Sampling step along image x and y axes at first octave. This is doubled
- for each additional octave. WARNING: Increasing this improves speed,
- however keypoint extraction becomes unreliable. */
- //为了减少运算量,提高计算速度,程序设置了采样间隔。即在第一组的各层图像的每个像素都通过盒状滤波器模块进行滤波处理,而在第二组的各层图像中采用隔点(采样间隔为21)滤波器处理的方式,第三组的采样间隔为22,其他组以此类推。
- //定义初始采样间隔,即第一组各层图像的采样间隔
- const int SAMPLE_STEP0 = 1;
- //总的层数,它等于组数乘以每组的层数
- int nTotalLayers = (nOctaveLayers+2)*nOctaves;
- //中间层数,即进行非最大值抑制的层数
- int nMiddleLayers = nOctaveLayers*nOctaves;
- //各类矢量变量,dets为Hessian矩阵行列式矢量矩阵,traces为Hessian矩阵迹矢量矩阵,sizes为盒状滤波器的尺寸大小矢量,sampleSteps为每一层的采样间隔矢量,middleIndices为中间层(即进行非最大值抑制的层)的索引矢量
- vector<Mat> dets(nTotalLayers);
- vector<Mat> traces(nTotalLayers);
- vector<int> sizes(nTotalLayers);
- vector<int> sampleSteps(nTotalLayers);
- vector<int> middleIndices(nMiddleLayers);
- //清空特征点变量
- keypoints.clear();
- // Allocate space and calculate properties of each layer
- //index为变量nTotalLayers的索引,middleIndex为变量nMiddleLayers的索引,step为采样间隔
- int index = 0, middleIndex = 0, step = SAMPLE_STEP0;
- //分配内存空间,计算各层的数据
- //由于采用了采样间隔,因此各层图像所检测到的行列式、迹等数量会不同,这样就需要为每一层分配不同的空间大小,保存不同数量的数据
- for( int octave = 0; octave < nOctaves; octave++ )
- {
- for( int layer = 0; layer < nOctaveLayers+2; layer++ )
- {
- /* The integral image sum is one pixel bigger than the source image*/
- dets[index].create( (sum.rows-1)/step, (sum.cols-1)/step, CV_32F );
- traces[index].create( (sum.rows-1)/step, (sum.cols-1)/step, CV_32F );
- //SURF_HAAR_SIZE0 = 9表示第一组第一层所使用的盒状滤波器的尺寸大小
- //SURF_HAAR_SIZE_INC = 6表示盒状滤波器尺寸大小增加的像素数量
- //公式11
- sizes[index] = (SURF_HAAR_SIZE0 + SURF_HAAR_SIZE_INC*layer) << octave;
- sampleSteps[index] = step;
- if( 0 < layer && layer <= nOctaveLayers )
- middleIndices[middleIndex++] = index; //标记中间层对应于图像堆中的所在层
- index++; //层的索引值加1
- }
- step *= 2; //采样间隔翻倍
- }
- // Calculate hessian determinant and trace samples in each layer
- //计算每层的Hessian矩阵的行列式和迹,具体实现的函数为SURFBuildInvoker,该函数在后面给出详细的讲解
- //parallel_for_表示这些计算可以进行平行处理(当然在编译的时候要选择TBB才能实现平行计算)。
- parallel_for_( Range(0, nTotalLayers),
- SURFBuildInvoker(sum, sizes, sampleSteps, dets, traces) );
- // Find maxima in the determinant of the hessian
- //检测特征点,具体实现的函数为SURFFindInvoker,该函数在后面给出详细的讲解
- parallel_for_( Range(0, nMiddleLayers),
- SURFFindInvoker(sum, mask_sum, dets, traces, sizes,
- sampleSteps, middleIndices, keypoints,
- nOctaveLayers, hessianThreshold) );
- //特征点排序
- std::sort(keypoints.begin(), keypoints.end(), KeypointGreater());
- }
计算行列式和迹的类:
- // Multi-threaded construction of the scale-space pyramid
- struct SURFBuildInvoker : ParallelLoopBody
- {
- //构造函数
- SURFBuildInvoker( const Mat& _sum, const vector<int>& _sizes,
- const vector<int>& _sampleSteps,
- vector<Mat>& _dets, vector<Mat>& _traces )
- {
- sum = &_sum;
- sizes = &_sizes;
- sampleSteps = &_sampleSteps;
- dets = &_dets;
- traces = &_traces;
- }
- //重载( )运算符
- void operator()(const Range& range) const
- {
- //遍历所有层,计算每层的行列式和迹,calcLayerDetAndTrace函数在后面给出详细介绍
- for( int i=range.start; i<range.end; i++ )
- calcLayerDetAndTrace( *sum, (*sizes)[i], (*sampleSteps)[i], (*dets)[i], (*traces)[i] );
- }
- //定义类的成员变量
- const Mat *sum;
- const vector<int> *sizes;
- const vector<int> *sampleSteps;
- vector<Mat>* dets;
- vector<Mat>* traces;
- };
计算某一层的行列式和迹calcLayerDetAndTrace函数:
- /*
- * Calculate the determinant and trace of the Hessian for a layer of the
- * scale-space pyramid
- */
- static void calcLayerDetAndTrace( const Mat& sum, int size, int sampleStep,
- Mat& det, Mat& trace )
- {
- //NX、NY、NXY分别表示Dxx模板、Dyy模板和Dxy模板中突起部分的数量,即公式5中的变量N
- const int NX=3, NY=3, NXY=4;
- //dx_s变量、dy_s变量、dxy_s变量的意思是分别用二维数组表示9×9的Dxx模板、Dyy模板和Dxy模板,二维数组的第二维代表模板的突起部分,第一维代表突起部分的坐标位置和权值。以模板的左上角为坐标原点,数组第一维中5个元素中的前两个表示突起部分左上角坐标,紧接着的后2个元素表示突起部分的右下角坐标,最后一个元素表示该突起部分的权值
- const int dx_s[NX][5] = { {0, 2, 3, 7, 1}, {3, 2, 6, 7, -2}, {6, 2, 9, 7, 1} };
- const int dy_s[NY][5] = { {2, 0, 7, 3, 1}, {2, 3, 7, 6, -2}, {2, 6, 7, 9, 1} };
- const int dxy_s[NXY][5] = { {1, 1, 4, 4, 1}, {5, 1, 8, 4, -1}, {1, 5, 4, 8, -1}, {5, 5, 8, 8, 1} };
- //Dx、Dy和Dxy为尺寸重新调整以后的模板的信息数据
- SurfHF Dx[NX], Dy[NY], Dxy[NXY];
- //判断盒状滤波器的尺寸大小是否超出了输入图像的边界
- if( size > sum.rows-1 || size > sum.cols-1 )
- return;
- //由9×9的初始模板生成其他尺寸大小的盒状滤波器模板,resizeHaarPattern函数在后面给出详细的解释
- resizeHaarPattern( dx_s , Dx , NX , 9, size, sum.cols );
- resizeHaarPattern( dy_s , Dy , NY , 9, size, sum.cols );
- resizeHaarPattern( dxy_s, Dxy, NXY, 9, size, sum.cols );
- /* The integral image 'sum' is one pixel bigger than the source image */
- //由该层的采样间隔计算图像的行和列的采样像素的数量
- int samples_i = 1+(sum.rows-1-size)/sampleStep;
- int samples_j = 1+(sum.cols-1-size)/sampleStep;
- /* Ignore pixels where some of the kernel is outside the image */
- //由该层的模板尺寸和采样间隔计算因为边界效应而需要预留的图像边界的大小
- int margin = (size/2)/sampleStep;
- //遍历该层所有的采样像素
- for( int i = 0; i < samples_i; i++ )
- {
- //在积分图像中,行首地址指针
- const int* sum_ptr = sum.ptr<int>(i*sampleStep);
- //去掉边界预留部分的该行的行列式首地址指针
- float* det_ptr = &det.at<float>(i+margin, margin);
- //去掉边界预留部分的该行的迹首地址指针
- float* trace_ptr = &trace.at<float>(i+margin, margin);
- for( int j = 0; j < samples_j; j++ )
- {
- //利用公式5计算三个盒状滤波器的响应值,calcHaarPattern函数很简单,就不再给出解释
- float dx = calcHaarPattern( sum_ptr, Dx , 3 );
- float dy = calcHaarPattern( sum_ptr, Dy , 3 );
- float dxy = calcHaarPattern( sum_ptr, Dxy, 4 );
- //指向积分图像中该行的下一个像素
- sum_ptr += sampleStep;
- //利用公式6计算行列式
- det_ptr[j] = dx*dy - 0.81f*dxy*dxy;
- //利用公式8计算迹
- trace_ptr[j] = dx + dy;
- }
- }
- }
由9×9的初始模板生成其他尺寸大小的盒状滤波器模板resizeHaarPattern函数:
- // src为源模板,即9×9的模板
- // dst为得到的新模板的信息数据
- // n为该模板的突起部分的数量
- // oldSize为源模板src的尺寸,即为9
- // newSize为新模板dst的尺寸
- // widthStep为积分图像的宽步长,也就是输入图像的宽步长
- static void
- resizeHaarPattern( const int src[][5], SurfHF* dst, int n, int oldSize, int newSize, int widthStep )
- {
- //由新、旧两个模板的尺寸计算它们的比值
- float ratio = (float)newSize/oldSize;
- //遍历模板的所有突起部分
- for( int k = 0; k < n; k++ )
- {
- //由比值计算新模板突起部分的左上角和右下角坐标
- int dx1 = cvRound( ratio*src[k][0] );
- int dy1 = cvRound( ratio*src[k][1] );
- int dx2 = cvRound( ratio*src[k][2] );
- int dy2 = cvRound( ratio*src[k][3] );
- //由突起部分的坐标计算它在积分图像的坐标位置,即图1中的四个点A、B、C、D
- dst[k].p0 = dy1*widthStep + dx1;
- dst[k].p1 = dy2*widthStep + dx1;
- dst[k].p2 = dy1*widthStep + dx2;
- dst[k].p3 = dy2*widthStep + dx2;
- //公式5中的wn/sn
- dst[k].w = src[k][4]/((float)(dx2-dx1)*(dy2-dy1));
- }
- }
检测特征点的类:
- // Multi-threaded search of the scale-space pyramid for keypoints
- struct SURFFindInvoker : ParallelLoopBody
- {
- //构造函数
- SURFFindInvoker( const Mat& _sum, const Mat& _mask_sum,
- const vector<Mat>& _dets, const vector<Mat>& _traces,
- const vector<int>& _sizes, const vector<int>& _sampleSteps,
- const vector<int>& _middleIndices, vector<KeyPoint>& _keypoints,
- int _nOctaveLayers, float _hessianThreshold )
- {
- sum = &_sum; //积分图像
- mask_sum = &_mask_sum; //掩码所需的积分图像
- dets = &_dets; //行列式
- traces = &_traces; //迹
- sizes = &_sizes; //尺寸
- sampleSteps = &_sampleSteps; //采样间隔
- middleIndices = &_middleIndices; //中间层
- keypoints = &_keypoints; //特征点
- nOctaveLayers = _nOctaveLayers; //层
- hessianThreshold = _hessianThreshold; //行列式的阈值
- }
- //成员函数,该函数在后面给出详细解释
- static void findMaximaInLayer( const Mat& sum, const Mat& mask_sum,
- const vector<Mat>& dets, const vector<Mat>& traces,
- const vector<int>& sizes, vector<KeyPoint>& keypoints,
- int octave, int layer, float hessianThreshold, int sampleStep );
- //重载( )运算符
- void operator()(const Range& range) const
- {
- //遍历中间层
- for( int i=range.start; i<range.end; i++ )
- {
- int layer = (*middleIndices)[i]; //提取出中间层所对应的图像堆中的所在层
- int octave = i / nOctaveLayers; //计算该层所对应的组索引
- //检测特征点
- findMaximaInLayer( *sum, *mask_sum, *dets, *traces, *sizes,
- *keypoints, octave, layer, hessianThreshold,
- (*sampleSteps)[layer] );
- }
- }
- //成员变量
- const Mat *sum;
- const Mat *mask_sum;
- const vector<Mat>* dets;
- const vector<Mat>* traces;
- const vector<int>* sizes;
- const vector<int>* sampleSteps;
- const vector<int>* middleIndices;
- vector<KeyPoint>* keypoints;
- int nOctaveLayers;
- float hessianThreshold;
- static Mutex findMaximaInLayer_m;
- };
检测特征点的findMaximaInLayer函数:
- /*
- * Find the maxima in the determinant of the Hessian in a layer of the
- * scale-space pyramid
- */
- void SURFFindInvoker::findMaximaInLayer( const Mat& sum, const Mat& mask_sum,
- const vector<Mat>& dets, const vector<Mat>& traces,
- const vector<int>& sizes, vector<KeyPoint>& keypoints,
- int octave, int layer, float hessianThreshold, int sampleStep )
- {
- // Wavelet Data
- //下面变量在使用掩码时会用到
- const int NM=1;
- const int dm[NM][5] = { {0, 0, 9, 9, 1} };
- SurfHF Dm;
- //该层所用的滤波器模板的尺寸
- int size = sizes[layer];
- // The integral image 'sum' is one pixel bigger than the source image
- //根据采样间隔得到该层图像每行和每列的采样像素数量
- int layer_rows = (sum.rows-1)/sampleStep;
- int layer_cols = (sum.cols-1)/sampleStep;
- // Ignore pixels without a 3x3x3 neighbourhood in the layer above
- //在每组内,上一层所用的滤波器模板要比下一层的模板尺寸大,因此由于边界效应而预留的边界空白区域就要多,所以在进行3×3×3邻域比较时,就要把多出来的这部分区域去掉
- int margin = (sizes[layer+1]/2)/sampleStep+1;
- //如果要用掩码,则需要调整掩码矩阵的大小
- if( !mask_sum.empty() )
- resizeHaarPattern( dm, &Dm, NM, 9, size, mask_sum.cols );
- //计算该层图像的宽步长
- int step = (int)(dets[layer].step/dets[layer].elemSize());
- //遍历该层图像的所有像素
- for( int i = margin; i < layer_rows - margin; i++ )
- {
- //行列式矩阵当前行的首地址指针
- const float* det_ptr = dets[layer].ptr<float>(i);
- //迹矩阵当前行的首地址指针
- const float* trace_ptr = traces[layer].ptr<float>(i);
- for( int j = margin; j < layer_cols-margin; j++ )
- {
- float val0 = det_ptr[j]; //行列式矩阵当前像素
- if( val0 > hessianThreshold ) //行列式值要大于所设置的阈值
- {
- /* Coordinates for the start of the wavelet in the sum image. There
- is some integer division involved, so don't try to simplify this
- (cancel out sampleStep) without checking the result is the same */
- //该点对应于输入图像的坐标位置
- int sum_i = sampleStep*(i-(size/2)/sampleStep);
- int sum_j = sampleStep*(j-(size/2)/sampleStep);
- /* The 3x3x3 neighbouring samples around the maxima.
- The maxima is included at N9[1][4] */
- const float *det1 = &dets[layer-1].at<float>(i, j); //下一层像素
- const float *det2 = &dets[layer].at<float>(i, j); //当前层像素
- const float *det3 = &dets[layer+1].at<float>(i, j); //上一层像素
- //3×3×3区域内的所有27个像素的行列式值
- float N9[3][9] = { { det1[-step-1], det1[-step], det1[-step+1],
- det1[-1] , det1[0] , det1[1],
- det1[step-1] , det1[step] , det1[step+1] },
- { det2[-step-1], det2[-step], det2[-step+1],
- det2[-1] , det2[0] , det2[1],
- det2[step-1] , det2[step] , det2[step+1] },
- { det3[-step-1], det3[-step], det3[-step+1],
- det3[-1] , det3[0] , det3[1],
- det3[step-1] , det3[step] , det3[step+1] } };
- /* Check the mask - why not just check the mask at the center of the wavelet? */
- //去掉掩码没有包含的像素
- if( !mask_sum.empty() )
- {
- const int* mask_ptr = &mask_sum.at<int>(sum_i, sum_j);
- float mval = calcHaarPattern( mask_ptr, &Dm, 1 );
- if( mval < 0.5 )
- continue;
- }
- /* Non-maxima suppression. val0 is at N9[1][4]*/
- //非最大值抑制,当前点与其周围的26个点比较,val0就是N9[1][4]
- if( val0 > N9[0][0] && val0 > N9[0][1] && val0 > N9[0][2] &&
- val0 > N9[0][3] && val0 > N9[0][4] && val0 > N9[0][5] &&
- val0 > N9[0][6] && val0 > N9[0][7] && val0 > N9[0][8] &&
- val0 > N9[1][0] && val0 > N9[1][1] && val0 > N9[1][2] &&
- val0 > N9[1][3] && val0 > N9[1][5] &&
- val0 > N9[1][6] && val0 > N9[1][7] && val0 > N9[1][8] &&
- val0 > N9[2][0] && val0 > N9[2][1] && val0 > N9[2][2] &&
- val0 > N9[2][3] && val0 > N9[2][4] && val0 > N9[2][5] &&
- val0 > N9[2][6] && val0 > N9[2][7] && val0 > N9[2][8] )
- {
- /* Calculate the wavelet center coordinates for the maxima */
- //插值的需要,坐标位置要减0.5
- float center_i = sum_i + (size-1)*0.5f;
- float center_j = sum_j + (size-1)*0.5f;
- //定义该候选特征点
- KeyPoint kpt( center_j, center_i, (float)sizes[layer],
- -1, val0, octave, CV_SIGN(trace_ptr[j]) );
- /* Interpolate maxima location within the 3x3x3 neighbourhood */
- //ds为滤波器模板尺寸的变化率,即当前层尺寸与下一层尺寸的差值
- int ds = size - sizes[layer-1];
- //插值计算,interpolateKeypoint函数在后面给出详细解释
- int interp_ok = interpolateKeypoint( N9, sampleStep, sampleStep, ds, kpt );
- /* Sometimes the interpolation step gives a negative size etc. */
- //如果得到合理的插值结果,保存该特征点
- if( interp_ok )
- {
- /*printf( "KeyPoint %f %f %d\n", point.pt.x, point.pt.y, point.size );*/
- cv::AutoLock lock(findMaximaInLayer_m);
- keypoints.push_back(kpt); //当前像素为特征点,保存该数据
- }
- }
- }
- }
- }
- }
插值计算interpolateKeypoint函数:
- /*
- * Maxima location interpolation as described in "Invariant Features from
- * Interest Point Groups" by Matthew Brown and David Lowe. This is performed by
- * fitting a 3D quadratic to a set of neighbouring samples.
- *
- * The gradient vector and Hessian matrix at the initial keypoint location are
- * approximated using central differences. The linear system Ax = b is then
- * solved, where A is the Hessian, b is the negative gradient, and x is the
- * offset of the interpolated maxima coordinates from the initial estimate.
- * This is equivalent to an iteration of Netwon's optimisation algorithm.
- *
- * N9 contains the samples in the 3x3x3 neighbourhood of the maxima
- * dx is the sampling step in x
- * dy is the sampling step in y
- * ds is the sampling step in size
- * point contains the keypoint coordinates and scale to be modified
- *
- * Return value is 1 if interpolation was successful, 0 on failure.
- */
- //N9为极值所在的3×3×3邻域
- //dx,dy,ds为横、纵坐标和尺度坐标的变化率(单位步长),dx和dy就是采样间隔
- static int
- interpolateKeypoint( float N9[3][9], int dx, int dy, int ds, KeyPoint& kpt )
- {
- //b为公式15中的负值
- Vec3f b(-(N9[1][5]-N9[1][3])/2, // Negative 1st deriv with respect to x
- -(N9[1][7]-N9[1][1])/2, // Negative 1st deriv with respect to y
- -(N9[2][4]-N9[0][4])/2); // Negative 1st deriv with respect to s
- //A为公式14
- Matx33f A(
- N9[1][3]-2*N9[1][4]+N9[1][5], // 2nd deriv x, x
- (N9[1][8]-N9[1][6]-N9[1][2]+N9[1][0])/4, // 2nd deriv x, y
- (N9[2][5]-N9[2][3]-N9[0][5]+N9[0][3])/4, // 2nd deriv x, s
- (N9[1][8]-N9[1][6]-N9[1][2]+N9[1][0])/4, // 2nd deriv x, y
- N9[1][1]-2*N9[1][4]+N9[1][7], // 2nd deriv y, y
- (N9[2][7]-N9[2][1]-N9[0][7]+N9[0][1])/4, // 2nd deriv y, s
- (N9[2][5]-N9[2][3]-N9[0][5]+N9[0][3])/4, // 2nd deriv x, s
- (N9[2][7]-N9[2][1]-N9[0][7]+N9[0][1])/4, // 2nd deriv y, s
- N9[0][4]-2*N9[1][4]+N9[2][4]); // 2nd deriv s, s
- //x=A-1b,公式13的结果,即偏移量
- Vec3f x = A.solve(b, DECOMP_LU);
- //判断偏移量是否合理,不能大于1,否则会偏移到别的特征点位置上
- bool ok = (x[0] != 0 || x[1] != 0 || x[2] != 0) &&
- std::abs(x[0]) <= 1 && std::abs(x[1]) <= 1 && std::abs(x[2]) <= 1;
- if( ok ) //如果偏移量合理
- {
- //由公式16,计算偏移后的位置
- kpt.pt.x += x[0]*dx;
- kpt.pt.y += x[1]*dy;
- //其实这里的size表示的是盒状滤波器模板的尺寸,但由于模板尺寸和图像尺度可以通过公式9转换,所有保存尺寸信息也无妨
- kpt.size = (float)cvRound( kpt.size + x[2]*ds );
- }
- return ok;
- }
方向分配和确定描述符类:
- struct SURFInvoker : ParallelLoopBody
- {
- // ORI_RADIUS表示6s圆邻域的采样半径长,ORI_WIN表示扇形滑动窗口的张角,PATCH_SZ表示20s方邻域的采样边长
- enum { ORI_RADIUS = 6, ORI_WIN = 60, PATCH_SZ = 20 };
- //构造函数
- SURFInvoker( const Mat& _img, const Mat& _sum,
- vector<KeyPoint>& _keypoints, Mat& _descriptors,
- bool _extended, bool _upright )
- {
- keypoints = &_keypoints; //特征点
- descriptors = &_descriptors; //描述符
- img = &_img; //输入图像
- sum = &_sum; //积分图像
- extended = _extended; //描述符是128维还是64维
- upright = _upright; //是否采用U-SURF算法
- // Simple bound for number of grid points in circle of radius ORI_RADIUS
- //以特征点为中心、半径为6s的圆邻域内,以s为采样间隔时所采样的像素数量
- //nOriSampleBound为正方形内的像素数量,在这里是用该变量来限定实际的采样数量
- const int nOriSampleBound = (2*ORI_RADIUS+1)*(2*ORI_RADIUS+1);
- // Allocate arrays
- //分配数组空间
- apt.resize(nOriSampleBound);
- aptw.resize(nOriSampleBound); //aptw为6s圆邻域内的高斯加权系数
- DW.resize(PATCH_SZ*PATCH_SZ); //DW为20s方邻域内的高斯加权系数
- /* Coordinates and weights of samples used to calculate orientation */
- //得到6s圆邻域内的高斯加权函数(方差为2.5s,与Bay原著取值不同)模板的系数,SURF_ORI_SIGMA = 2.5f;
- Mat G_ori = getGaussianKernel( 2*ORI_RADIUS+1, SURF_ORI_SIGMA, CV_32F );
- //实际6s圆邻域内的采样数
- nOriSamples = 0;
- //事先计算好6s圆邻域内所有采样像素的高斯加权系数
- for( int i = -ORI_RADIUS; i <= ORI_RADIUS; i++ )
- {
- for( int j = -ORI_RADIUS; j <= ORI_RADIUS; j++ )
- {
- if( i*i + j*j <= ORI_RADIUS*ORI_RADIUS )
- {
- apt[nOriSamples] = cvPoint(i,j); //相对于圆心的坐标位置
- //高斯加权系数
- aptw[nOriSamples++] = G_ori.at<float>(i+ORI_RADIUS,0) * G_ori.at<float>(j+ORI_RADIUS,0);
- }
- }
- }
- //判断实际的采样数量是否超过了最大限定数量
- CV_Assert( nOriSamples <= nOriSampleBound );
- /* Gaussian used to weight descriptor samples */
- //得到20s方邻域内的高斯加权函数(方差为3.3s)模板的系数,SURF_DESC_SIGMA = 3.3f
- Mat G_desc = getGaussianKernel( PATCH_SZ, SURF_DESC_SIGMA, CV_32F );
- //事先计算好20s方邻域内所有采样像素的高斯加权系数
- for( int i = 0; i < PATCH_SZ; i++ )
- {
- for( int j = 0; j < PATCH_SZ; j++ )
- DW[i*PATCH_SZ+j] = G_desc.at<float>(i,0) * G_desc.at<float>(j,0);
- }
- }
- //重载( )运算符
- void operator()(const Range& range) const
- {
- /* X and Y gradient wavelet data */
- //x方向和y方向的Haar小波响应的突起部分的数量
- const int NX=2, NY=2;
- //dx_s和dy_s的含义与盒状滤波器模板的含义相同,数组中五个元素的前四个元素表示突起部分左上角和右下角的坐标位置,第五个元素为权值
- const int dx_s[NX][5] = {{0, 0, 2, 4, -1}, {2, 0, 4, 4, 1}};
- const int dy_s[NY][5] = {{0, 0, 4, 2, 1}, {0, 2, 4, 4, -1}};
- // Optimisation is better using nOriSampleBound than nOriSamples for
- // array lengths. Maybe because it is a constant known at compile time
- //以特征点为中心、半径为6s的圆邻域内,以s为采样间隔时所采样的像素数量
- //nOriSampleBound实为正方形内的像素数量,正方形的面积要大于圆形,所以为了保险起见,后面在定义变量的时候,用该值表示变量的数量
- const int nOriSampleBound =(2*ORI_RADIUS+1)*(2*ORI_RADIUS+1);
- //数组X保存6s圆邻域内的Haar小波响应的x方向梯度值,数组Y保存y方向梯度,数组angle保存角度
- float X[nOriSampleBound], Y[nOriSampleBound], angle[nOriSampleBound];
- //定义20s方邻域的二维数组
- uchar PATCH[PATCH_SZ+1][PATCH_SZ+1];
- //DX和DY分别为20s方邻域内采样像素的dx和dy
- float DX[PATCH_SZ][PATCH_SZ], DY[PATCH_SZ][PATCH_SZ];
- //由于矩阵matX,matY和_angle的rows都为1,所以这三个矩阵变量其实是矢量变量。它们的值分别为X,Y和angle数组的值
- CvMat matX = cvMat(1, nOriSampleBound, CV_32F, X);
- CvMat matY = cvMat(1, nOriSampleBound, CV_32F, Y);
- CvMat _angle = cvMat(1, nOriSampleBound, CV_32F, angle);
- //定义20s方邻域的矩阵变量
- Mat _patch(PATCH_SZ+1, PATCH_SZ+1, CV_8U, PATCH);
- //确定描述符是128维还是64维
- int dsize = extended ? 128 : 64;
- //k1为0,k2为N,即特征点的总数
- int k, k1 = range.start, k2 = range.end;
- //该变量代表特征点的最大尺度
- float maxSize = 0;
- //遍历所有特征点,找到特征点的最大尺度,其实找到的是盒状滤波器模板的最大尺寸
- for( k = k1; k < k2; k++ )
- {
- maxSize = std::max(maxSize, (*keypoints)[k].size);
- }
- //得到20s方邻域内的最大值,即带入尺度s的最大值
- int imaxSize = std::max(cvCeil((PATCH_SZ+1)*maxSize*1.2f/9.0f), 1);
- //在系统中为20s方邻域开辟一段包括了所有可能的s值的内存空间
- Ptr<CvMat> winbuf = cvCreateMat( 1, imaxSize*imaxSize, CV_8U );
- //遍历所有特征点
- for( k = k1; k < k2; k++ )
- {
- int i, j, kk, nangle;
- float* vec;
- SurfHF dx_t[NX], dy_t[NY];
- KeyPoint& kp = (*keypoints)[k]; //当前特征点
- float size = kp.size; //特征点尺度,其实是盒状滤波器模板的尺寸
- Point2f center = kp.pt; //特征点的位置坐标
- /* The sampling intervals and wavelet sized for selecting an orientation
- and building the keypoint descriptor are defined relative to 's' */
- //由公式9,把模板尺寸转换为图像尺度,得到真正的尺度信息
- float s = size*1.2f/9.0f;
- /* To find the dominant orientation, the gradients in x and y are
- sampled in a circle of radius 6s using wavelets of size 4s.
- We ensure the gradient wavelet size is even to ensure the
- wavelet pattern is balanced and symmetric around its center */
- //得到确定方向时所用的haar小波响应的尺寸,即4s
- int grad_wav_size = 2*cvRound( 2*s );
- //如果haar小波响应的尺寸大于输入图像的尺寸,则进入if语句
- if( sum->rows < grad_wav_size || sum->cols < grad_wav_size )
- {
- /* when grad_wav_size is too big,
- * the sampling of gradient will be meaningless
- * mark keypoint for deletion. */
- //进入该if语句,说明haar小波响应的尺寸太大,在这么大的尺寸下进行任何操作都是没有意义的,所以标注该特征点为无效的特征点,待后面的代码删除该特征点
- kp.size = -1;
- continue; //继续下一个特征点的操作
- }
- float descriptor_dir = 360.f - 90.f;
- if (upright == 0) //不采用U-SURF算法
- {
- //调整两个Haar小波响应的尺寸大小
- resizeHaarPattern( dx_s, dx_t, NX, 4, grad_wav_size, sum->cols );
- resizeHaarPattern( dy_s, dy_t, NY, 4, grad_wav_size, sum->cols );
- //遍历6s圆邻域内的所有采样像素
- for( kk = 0, nangle = 0; kk < nOriSamples; kk++ )
- {
- //x和y为该采样像素在输入图像的坐标位置
- int x = cvRound( center.x + apt[kk].x*s - (float)(grad_wav_size-1)/2 );
- int y = cvRound( center.y + apt[kk].y*s - (float)(grad_wav_size-1)/2 );
- //判断该坐标是否超出了图像的边界
- if( y < 0 || y >= sum->rows - grad_wav_size ||
- x < 0 || x >= sum->cols - grad_wav_size )
- continue; //超出了,则继续下一个采样像素的操作
- //积分图像的像素
- const int* ptr = &sum->at<int>(y, x);
- //分别得到x方向和y方向的Haar小波响应值
- float vx = calcHaarPattern( ptr, dx_t, 2 );
- float vy = calcHaarPattern( ptr, dy_t, 2 );
- //对Haar小波响应值进行高斯加权处理
- X[nangle] = vx*aptw[kk];
- Y[nangle] = vy*aptw[kk];
- nangle++; //为实际采样像素的总数计数
- }
- if( nangle == 0 ) // nangle == 0表明6s圆邻域内没有采样像素
- {
- // No gradient could be sampled because the keypoint is too
- // near too one or more of the sides of the image. As we
- // therefore cannot find a dominant direction, we skip this
- // keypoint and mark it for later deletion from the sequence.
- //这种情况说明该特征点与图像的边界过于接近,因此也要把它删除
- kp.size = -1;
- continue; //继续下一个特征点的操作
- }
- //把matX,matY和_angle的长度重新定义为实际的6s圆邻域内的采样点数
- matX.cols = matY.cols = _angle.cols = nangle;
- //用cvCartToPolar函数(直角坐标转换为极坐标)求梯度坐标系下采样点的角度,结果保存在矩阵_angle对应的angle数组内
- cvCartToPolar( &matX, &matY, 0, &_angle, 1 );
- float bestx = 0, besty = 0, descriptor_mod = 0;
- //SURF_ORI_SEARCH_INC = 5
- //以5度的步长,遍历360度的圆周
- for( i = 0; i < 360; i += SURF_ORI_SEARCH_INC )
- {
- //sumx和sumy分别表示梯度坐标系下扇形滑动窗口内所有采样像素的横、纵坐标之和,temp_mod表示模
- float sumx = 0, sumy = 0, temp_mod;
- //遍历所有的采样像素
- for( j = 0; j < nangle; j++ )
- {
- //得到当前采样点相对于旋转扇区的角度
- int d = std::abs(cvRound(angle[j]) - i);
- //判断d是否在正负30度之间,即判断当前点是否落在该扇形滑动窗口内
- if( d < ORI_WIN/2 || d > 360-ORI_WIN/2 )
- {
- //累计在该滑动窗口内的像素的横、纵坐标之和
- sumx += X[j];
- sumy += Y[j];
- }
- }
- //公式17,求该滑动窗口内的模
- temp_mod = sumx*sumx + sumy*sumy;
- //判断该滑动窗口内的模是否为已计算过的最大值
- if( temp_mod > descriptor_mod )
- {
- //重新给模最大值,横、纵坐标之和赋值
- descriptor_mod = temp_mod;
- bestx = sumx;
- besty = sumy;
- }
- }
- //公式18和公式19,求特征点的方向
- descriptor_dir = fastAtan2( -besty, bestx );
- }
- //为特征点的方向赋值,如果采用U-SURF算法,则它的方向恒为270度;
- kp.angle = descriptor_dir;
- //如果不需要生成描述符,则不进行if后面的操作
- if( !descriptors || !descriptors->data )
- continue;
- //以下部分为描述符的生成
- /* Extract a window of pixels around the keypoint of size 20s */
- // win_size为20s,即20s方邻域的真正尺寸
- int win_size = (int)((PATCH_SZ+1)*s);
- //确保20s方邻域的尺寸不超过事先设置好的内存空间
- CV_Assert( winbuf->cols >= win_size*win_size );
- //定义20s方邻域的矩阵
- Mat win(win_size, win_size, CV_8U, winbuf->data.ptr);
- if( !upright ) //不采用U-SURF算法
- {
- //把特征点的方向角度转换为弧度形式
- descriptor_dir *= (float)(CV_PI/180);
- //方向角度的正弦值和余弦值
- float sin_dir = -std::sin(descriptor_dir);
- float cos_dir = std::cos(descriptor_dir);
- /* Subpixel interpolation version (slower). Subpixel not required since
- the pixels will all get averaged when we scale down to 20 pixels */
- /*
- float w[] = { cos_dir, sin_dir, center.x,
- -sin_dir, cos_dir , center.y };
- CvMat W = cvMat(2, 3, CV_32F, w);
- cvGetQuadrangleSubPix( img, &win, &W );
- */
- float win_offset = -(float)(win_size-1)/2;
- //得到旋转校正以后的20s方邻域的左上角坐标
- float start_x = center.x + win_offset*cos_dir + win_offset*sin_dir;
- float start_y = center.y - win_offset*sin_dir + win_offset*cos_dir;
- //20s方邻域内像素的首地址指针
- uchar* WIN = win.data;
- #if 0
- // Nearest neighbour version (faster)
- for( i = 0; i < win_size; i++, start_x += sin_dir, start_y += cos_dir )
- {
- float pixel_x = start_x;
- float pixel_y = start_y;
- for( j = 0; j < win_size; j++, pixel_x += cos_dir, pixel_y -= sin_dir )
- {
- int x = std::min(std::max(cvRound(pixel_x), 0), img->cols-1);
- int y = std::min(std::max(cvRound(pixel_y), 0), img->rows-1);
- WIN[i*win_size + j] = img->at<uchar>(y, x);
- }
- }
- #else
- int ncols1 = img->cols-1, nrows1 = img->rows-1;
- size_t imgstep = img->step;
- //找到20s方邻域内的所有像素
- for( i = 0; i < win_size; i++, start_x += sin_dir, start_y += cos_dir )
- {
- //得到旋转校正后的20s方邻域的行的首像素坐标
- double pixel_x = start_x;
- double pixel_y = start_y;
- for( j = 0; j < win_size; j++, pixel_x += cos_dir, pixel_y -= sin_dir )
- {
- //得到旋转校正后的20s方邻域的像素坐标
- int ix = cvFloor(pixel_x), iy = cvFloor(pixel_y);
- //判断该像素坐标是否超过了图像的边界
- if( (unsigned)ix < (unsigned)ncols1 &&
- (unsigned)iy < (unsigned)nrows1 ) //没有超过边界
- {
- //计算偏移量
- float a = (float)(pixel_x - ix), b = (float)(pixel_y - iy);
- //提取出图像的灰度值
- const uchar* imgptr = &img->at<uchar>(iy, ix);
- //由线性插值法计算20s方邻域内所对应的像素灰度值
- WIN[i*win_size + j] = (uchar)
- cvRound(imgptr[0]*(1.f - a)*(1.f - b) +
- imgptr[1]*a*(1.f - b) +
- imgptr[imgstep]*(1.f - a)*b +
- imgptr[imgstep+1]*a*b);
- }
- else //超过了图像边界
- {
- //用边界处的灰度值代替超出部分的20s方邻域内的灰度值
- int x = std::min(std::max(cvRound(pixel_x), 0), ncols1);
- int y = std::min(std::max(cvRound(pixel_y), 0), nrows1);
- WIN[i*win_size + j] = img->at<uchar>(y, x);
- }
- }
- }
- #endif
- }
- else //采用U-SURF算法
- {
- // extract rect - slightly optimized version of the code above
- // TODO: find faster code, as this is simply an extract rect operation,
- // e.g. by using cvGetSubRect, problem is the border processing
- // descriptor_dir == 90 grad
- // sin_dir == 1
- // cos_dir == 0
- float win_offset = -(float)(win_size-1)/2;
- //得到20s方邻域的左上角坐标
- int start_x = cvRound(center.x + win_offset);
- int start_y = cvRound(center.y - win_offset);
- uchar* WIN = win.data;
- //找到20s方邻域内的所有像素
- for( i = 0; i < win_size; i++, start_x++ )
- {
- //行的首像素坐标
- int pixel_x = start_x;
- int pixel_y = start_y;
- for( j = 0; j < win_size; j++, pixel_y-- )
- {
- //x和y为像素坐标
- int x = MAX( pixel_x, 0 );
- int y = MAX( pixel_y, 0 );
- //确保坐标不超过图像边界
- x = MIN( x, img->cols-1 );
- y = MIN( y, img->rows-1 );
- WIN[i*win_size + j] = img->at<uchar>(y, x); //赋值
- }
- }
- }
- // Scale the window to size PATCH_SZ so each pixel's size is s. This
- // makes calculating the gradients with wavelets of size 2s easy
- //利用面积相关法,把边长为20s的正方形缩小为边长为20的正方形(尺度s一定是大于1,所以是缩小,而不是扩大),也就是相当于对边长为20s的正方形进行采样间隔为s的等间隔采样
- resize(win, _patch, _patch.size(), 0, 0, INTER_AREA);
- // Calculate gradients in x and y with wavelets of size 2s
- //遍历20s方邻域内的所有采样像素
- for( i = 0; i < PATCH_SZ; i++ )
- for( j = 0; j < PATCH_SZ; j++ )
- {
- float dw = DW[i*PATCH_SZ + j]; //得到高斯加权系数
- //计算加权后的dx和dy
- float vx = (PATCH[i][j+1] - PATCH[i][j] + PATCH[i+1][j+1] - PATCH[i+1][j])*dw;
- float vy = (PATCH[i+1][j] - PATCH[i][j] + PATCH[i+1][j+1] - PATCH[i][j+1])*dw;
- DX[i][j] = vx;
- DY[i][j] = vy;
- }
- // Construct the descriptor
- //描述符矢量变量
- vec = descriptors->ptr<float>(k);
- //描述符矢量变量清零
- for( kk = 0; kk < dsize; kk++ )
- vec[kk] = 0;
- double square_mag = 0; //模的平方
- if( extended ) //128维描述符
- {
- // 128-bin descriptor
- //遍历20s方邻域内的4×4个子区域
- for( i = 0; i < 4; i++ )
- for( j = 0; j < 4; j++ )
- {
- //遍历子区域内的25个采样像素
- for(int y = i*5; y < i*5+5; y++ )
- {
- for(int x = j*5; x < j*5+5; x++ )
- {
- float tx = DX[y][x], ty = DY[y][x]; //得到dx和dy
- if( ty >= 0 ) //dy ≥ 0
- {
- vec[0] += tx; //累加dx
- vec[1] += (float)fabs(tx); //累加|dx|
- } else { //dy < 0
- vec[2] += tx; //累加dx
- vec[3] += (float)fabs(tx); //累加|dx|
- }
- if ( tx >= 0 ) //dx ≥ 0
- {
- vec[4] += ty; //累加dy
- vec[5] += (float)fabs(ty); //累加|dy|
- } else { //dx < 0
- vec[6] += ty; //累加dy
- vec[7] += (float)fabs(ty); //累加|dy|
- }
- }
- }
- for( kk = 0; kk < 8; kk++ )
- square_mag += vec[kk]*vec[kk];
- vec += 8;
- }
- }
- else //64维描述符
- {
- // 64-bin descriptor
- //遍历20s方邻域内的4×4个子区域
- for( i = 0; i < 4; i++ )
- for( j = 0; j < 4; j++ )
- {
- //遍历子区域内的25个采样像素
- for(int y = i*5; y < i*5+5; y++ )
- {
- for(int x = j*5; x < j*5+5; x++ )
- {
- float tx = DX[y][x], ty = DY[y][x]; //得到dx和dy
- //累加子区域内的dx、dy、|dx|和|dy|
- vec[0] += tx; vec[1] += ty;
- vec[2] += (float)fabs(tx); vec[3] += (float)fabs(ty);
- }
- }
- for( kk = 0; kk < 4; kk++ )
- square_mag += vec[kk]*vec[kk];
- vec+=4;
- }
- }
- // unit vector is essential for contrast invariance
- //为满足对比度不变性,即去除光照变化的影响,对描述符进行归一化处理
- vec = descriptors->ptr<float>(k);
- float scale = (float)(1./(sqrt(square_mag) + DBL_EPSILON));
- for( kk = 0; kk < dsize; kk++ )
- vec[kk] *= scale; //公式21
- }
- }
- // Parameters
- //成员变量
- const Mat* img;
- const Mat* sum;
- vector<KeyPoint>* keypoints;
- Mat* descriptors;
- bool extended;
- bool upright;
- // Pre-calculated values
- int nOriSamples;
- vector<Point> apt;
- vector<float> aptw;
- vector<float> DW;
- };