train _cascade 源码阅读之HOG特征

最新推荐文章于 2022-07-01 12:56:38 发布

荪荪

最新推荐文章于 2022-07-01 12:56:38 发布

阅读量912

点赞数

分类专栏：人脸检测

人脸检测专栏收录该内容

50 篇文章 0 订阅

订阅专栏

本文讨论OpenCV train_cascade 级联分类器中的HOG特征实现，HOG特征原理可以参考此文。特征的初始化框架和LBP 特征是一致的，感兴趣可以参考

train_cascade 源码阅读之LBP 特征中的介绍。 HOG，即Histogram of Oriented Gradient 方向梯度直方图，常用于解决人体目标的检测的图像描述子，用来表达人体，提取人体外形信息和运动信息形成丰富的特征集。
生成过程：检测窗口--> 归一化图像--> 计算梯度-->对每一个cell块对梯度直方图进行规定权重的投影 --> 对每个重叠block块内的cell进行对比度归一化 --> 把所有block内的直方图向量一起组合成一个大的HOG特征向量。（参考自 blog.sina.com.cn/s/blog_60e6e3d50101bkpn.html）
在HOG特征的操作中，与前述的Haar特征与LBP特征不同的是，初始时矩阵不再是单一的一个sum，而是一个矩阵向量hist，其中含有9个类似sum一样的矩阵，分别存放每一个方向的结果。

[cpp]view plaincopy 
    
 class CvHOGEvaluator : public CvFeatureEvaluator  
 {  
 public:  
 ……  
     virtual void setImage(const cv::Mat& img, uchar clsLabel, int idx);  
     virtual float operator()(  
             int varIdx,   
             int sampleIdx) const;  
 ……  
 protected:  
     virtual void generateFeatures();  
     virtual void integralHistogram(  
             const cv::Mat &img,   
             std::vector<cv::Mat> &histogram,   
             cv::Mat &norm,   
             int nbins) const;  
     class Feature  
     {  
     public:  
         Feature();  
         Feature( int offset, int x, int y, int cellW, int cellH );  
         float calc(   
                 const std::vector<cv::Mat> &_hists,   
                 const cv::Mat &_normSum, size_t y,   
                 int featComponent ) const;  
         void write( cv::FileStorage &fs ) const;  
         void write( cv::FileStorage &fs, int varIdx ) const;  
   
         cv::Rect rect[N_CELLS]; //cells  
   
         struct  
         {  
             int p0, p1, p2, p3;  
         } fastRect[N_CELLS];  
     };  
     std::vector<Feature> features;  
   
     cv::Mat normSum; //for nomalization calculation (L1 or L2)  
     std::vector<cv::Mat> hist;  
 };  

接下来是初始化积分图操作下标的过程。

[cpp]view plaincopy 
    
 void CvHOGEvaluator::generateFeatures()  
 {  
     int offset = winSize.width + 1;  
     Size blockStep;  
     int x, y, t, w, h;  
   
     for (t = 8; t <= winSize.width/2; t+=8)   
         //t = size of a cell. blocksize = 4*cellSize  
     {  
         blockStep = Size(4,4);  
         w = 2*t; //width of a block  
         h = 2*t; //height of a block  
         for (x = 0; x <= winSize.width - w; x += blockStep.width)  
         {  
             for (y = 0; y <= winSize.height - h; y += blockStep.height)  
             {  
                 features.push_back(Feature(offset, x, y, t, t));  
             }  
         }  
         w = 2*t;  
         h = 4*t;  
         for (x = 0; x <= winSize.width - w; x += blockStep.width)  
         {  
             for (y = 0; y <= winSize.height - h; y += blockStep.height)  
             {  
                 features.push_back(Feature(offset, x, y, t, 2*t));  
             }  
         }  
         w = 4*t;  
         h = 2*t;  
         for (x = 0; x <= winSize.width - w; x += blockStep.width)  
         {  
             for (y = 0; y <= winSize.height - h; y += blockStep.height)  
             {  
                 features.push_back(Feature(offset, x, y, 2*t, t));  
             }  
         }  
     }  
   
     numFeatures = (int)features.size();  
 }  

t表示cell的尺寸，一个block含有2×2个cell，因此，t需要不大于winSize.width/2，在这里采用了三种不同的形状，block遍历的step是4×4的。输入给Feature的构造参数是偏移量，左上角坐标点，和cell的宽高。
接下来看Feature的构造。

[cpp]view plaincopy 
    
 CvHOGEvaluator::Feature::Feature( int offset, int x, int y, int cellW, int cellH )  
 {  
     rect[0] = Rect(x, y, cellW, cellH); //cell0  
     rect[1] = Rect(x+cellW, y, cellW, cellH); //cell1  
     rect[2] = Rect(x, y+cellH, cellW, cellH); //cell2  
     rect[3] = Rect(x+cellW, y+cellH, cellW, cellH); //cell3  
   
     for (int i = 0; i < N_CELLS; i++)  
     {  
         CV_SUM_OFFSETS(fastRect[i].p0, fastRect[i].p1, fastRect[i].p2, fastRect[i].p3, rect[i], offset);  
     }  
 }  

分别创建了四个cell矩形，CV_SUM_OFFSET宏计算的是矩形上的点在拉成行向量的积分图中的偏移量。
积分图中的坐标算好了，再看积分图的生成过程。

[cpp]view plaincopy 
    
 void CvHOGEvaluator::  
 setImage(const Mat &img, uchar clsLabel, int idx)  
 {  
     CV_DbgAssert( !hist.empty());  
     CvFeatureEvaluator::setImage( img, clsLabel, idx );  
     vector<Mat> integralHist;  
     for (int bin = 0; bin < N_BINS; bin++)  
     {  
         integralHist.push_back(   
                     Mat(winSize.height + 1,   
                         winSize.width + 1,   
                         hist[bin].type(),   
                         hist[bin].ptr<float>((int)idx)) );  
     }  
     Mat integralNorm(  
                 winSize.height + 1,   
                 winSize.width + 1,   
                 normSum.type(),   
                 normSum.ptr<float>((int)idx));  
     integralHistogram(img, integralHist, integralNorm, (int)N_BINS);  
 }  

与LBP，Haar相同，新建Mat，传入积分图的数据地址，不同的是这里是矩阵向量，保存9个方向的积分图。这里没有使用OpenCV自带的integral直接计算积分图，而是自行实现了一个。

[cpp]view plaincopy 
    
 void CvHOGEvaluator::integralHistogram(  
         const Mat   &img,   
         vector<Mat> &histogram,   
         Mat         &norm,   
         int         nbins) const  
 {  
     CV_Assert( img.type() == CV_8U || img.type() == CV_8UC3 );  
     int x, y, binIdx;  
   
     Size gradSize(img.size());  
     Size histSize(histogram[0].size());  
     Mat grad(gradSize, CV_32F);  
     Mat qangle(gradSize, CV_8U);  
   
     AutoBuffer<int> mapbuf(gradSize.width + gradSize.height + 4);  
     int* xmap = (int*)mapbuf + 1;  
     int* ymap = xmap + gradSize.width + 2;  
   
     const int borderType = (int)BORDER_REPLICATE;  
   
     for( x = -1; x < gradSize.width + 1; x++ )  
         xmap[x] = borderInterpolate(x, gradSize.width, borderType);  
     for( y = -1; y < gradSize.height + 1; y++ )  
         ymap[y] = borderInterpolate(y, gradSize.height, borderType);  
   
     int width = gradSize.width;  
     AutoBuffer<float> _dbuf(width*4);  
     float* dbuf = _dbuf;  
     Mat Dx(1, width, CV_32F, dbuf);  
     Mat Dy(1, width, CV_32F, dbuf + width);  
     Mat Mag(1, width, CV_32F, dbuf + width*2);  
     Mat Angle(1, width, CV_32F, dbuf + width*3);  
   
     float angleScale = (float)(nbins/CV_PI);  
   
     for( y = 0; y < gradSize.height; y++ )  
     {  
         const uchar* currPtr = img.data + img.step*ymap[y];  
         const uchar* prevPtr = img.data + img.step*ymap[y-1];  
         const uchar* nextPtr = img.data + img.step*ymap[y+1];  
         float* gradPtr = (float*)grad.ptr(y);  
         uchar* qanglePtr = (uchar*)qangle.ptr(y);  
   
         for( x = 0; x < width; x++ )  
         {  
             dbuf[x] = (float)(currPtr[xmap[x+1]] - currPtr[xmap[x-1]]);  
             dbuf[width + x] = (float)(nextPtr[xmap[x]] - prevPtr[xmap[x]]);  
         }  
         cartToPolar( Dx, Dy, Mag, Angle, false );  
         for( x = 0; x < width; x++ )  
         {  
             float mag = dbuf[x+width*2];  
             float angle = dbuf[x+width*3];  
             angle = angle*angleScale - 0.5f;  
             int bidx = cvFloor(angle);  
             angle -= bidx;  
             if( bidx < 0 )  
                 bidx += nbins;  
             else if( bidx >= nbins )  
                 bidx -= nbins;  
   
             qanglePtr[x] = (uchar)bidx;  
             gradPtr[x] = mag;  
         }  
     }  
     integral(grad, norm, grad.depth());  
   
     float* histBuf;  
     const float* magBuf;  
     const uchar* binsBuf;  
   
     int binsStep = (int)( qangle.step / sizeof(uchar) );  
     int histStep = (int)( histogram[0].step / sizeof(float) );  
     int magStep = (int)( grad.step / sizeof(float) );  
     for( binIdx = 0; binIdx < nbins; binIdx++ )  
     {  
         histBuf = (float*)histogram[binIdx].data;  
         magBuf = (const float*)grad.data;  
         binsBuf = (const uchar*)qangle.data;  
   
         memset( histBuf, 0, histSize.width * sizeof(histBuf[0]) );  
         histBuf += histStep + 1;  
         for( y = 0; y < qangle.rows; y++ )  
         {  
             histBuf[-1] = 0.f;  
             float strSum = 0.f;  
             for( x = 0; x < qangle.cols; x++ )  
             {  
                 if( binsBuf[x] == binIdx )  
                     strSum += magBuf[x];  
                 histBuf[x] = histBuf[-histStep + x] + strSum;  
             }  
             histBuf += histStep;  
             binsBuf += binsStep;  
             magBuf += magStep;  
         }  
     }  
 }  

看完了代码才知道，以前对HOG特征的理解是有偏差的，尤其是在梯度的计算上，犯了严重的想当然的错误。特征计算完成后，调用integral计算平方积分图，再根据角度，将幅值放到每个积分直方图中。
最后通过如下方式调用计算HOG特征，并进行归一化。

[cpp]view plaincopy 
    
 inline float CvHOGEvaluator::Feature::calc(   
         const std::vector<cv::Mat>& _hists,   
         const cv::Mat& _normSum, size_t y,   
         int featComponent ) const  
 {  
     float normFactor;  
     float res;  
   
     int binIdx = featComponent % N_BINS;  
     int cellIdx = featComponent / N_BINS;  
   
     const float *phist = _hists[binIdx].ptr<float>((int)y);  
     res = phist[fastRect[cellIdx].p0]   
             - phist[fastRect[cellIdx].p1]   
             - phist[fastRect[cellIdx].p2]   
             + phist[fastRect[cellIdx].p3];  
   
     const float *pnormSum = _normSum.ptr<float>((int)y);  
     normFactor = (float)(pnormSum[fastRect[0].p0]   
             - pnormSum[fastRect[1].p1]   
             - pnormSum[fastRect[2].p2]   
             + pnormSum[fastRect[3].p3]);  
     res = (res > 0.001f) ? ( res / (normFactor + 0.001f) ) : 0.f;   
     //for cutting negative values, which apper due to floating precision  
   
     return res;  
 }  

荪荪

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
train _cascade 源码阅读之HOG特征

本文讨论OpenCV train_cascade 级联分类器中的HOG特征实现，HOG特征原理可以参考此文。特征的初始化框架和LBP 特征是一致的，感兴趣可以参考train_cascade 源码阅读之LBP 特征中的介绍。HOG，即Histogram of Oriented Gradient 方向梯度直方图，常用于解决人体目标的检测的图像描述子，用来表达人体，提取人体外形信息
复制链接

扫一扫