基于HOG特征的Adaboost行人检测

最新推荐文章于 2020-10-05 19:57:31 发布

ChenLee_1

最新推荐文章于 2020-10-05 19:57:31 发布

阅读量1w

点赞数 2

分类专栏： CV相关

CV相关专栏收录该内容

281 篇文章 58 订阅

订阅专栏

申明，本文非笔者原创，原文转载自：http://blog.csdn.net/van_ruin/article/details/9166591

1.方向梯度直方图（Histogramof Oriented Gradient, HOG）特征是一种在计算机视觉和图像处理中用来进行物体检测的特征描述子。它通过计算和统计图像局部区域的梯度方向直方图来构成特征。基本知识可以参考博客：http://blog.csdn.net/zouxy09/article/details/7929348

2.Adaboost的基础知识可以参考书籍：统计学习方法，第八章-提升方法adaboost。

这里利用HOG来训练Adaboost行人检测。在Haar-Adaboost算法中，弱分类器仅对一维分类。但是在Hog特征中，特征是每个block的串联。如果仅对一维分类（一个cell的其中一个方向的权值），就不能有效利用block的归一化效果。所以我们使用logistic弱分类器对每个block进行分类（实验中，每个block包含4个cell，每个cell有9个bin，即36维特征）。

本实验需要注意的地方：

1. adaboost误差率需要计算权重

2. logistic回归需要使用带权重的logistic分类器

3. logistic分类可能与数据分布相反。需要计算两次。（相反的情况下，拟合没有意义，需要将数据反转（1->0,0->1））

发现总结与问题

1. 公理 1. 对于任何数据的二值分类，能够得到大于等于 0.5 的线性分类器。

2. 推论 . 对于任何带权重数据的二值分类，能够得到大于等于 0.5 的线性分类器。

3. 推论？对于任何带权重数据的 n 值分类，能够得到大于等于 1/n 的线性分类器。

4. 对于与 logistic 函数分布相反的数据，应该如何处理？（本实验的处理方式如前面所述）。

实验结果后的猜想

ß 猜想 1 ： Adaboost 弱分类器所选取的特征仍然要保持一定的颗粒度。像素级的特征是无效的。

实验结果与分析

训练集 : 500/500 ;测试集 : 19/22（200个弱分类器）

测试数据较少，但是训练集的高正确率至少证明其能够由弱分类器（错误率普遍在 0.25 左右）提高样本数据集的精度。

17 张图片中，有部分图片较模糊，行人影像较小，可能导致难以分辨。

下面给出代码，希望各位能够指正错误。说明：本代码全部由自己编写，所用函数未调用OpenCV实用库函数，及机器学习库函数（基本数据除外）。

[cpp]view plaincopy 
    
 /***********************************************************/  
 /** Copyright by Weidi Xu, S.C.U.T in Guangzhou, Guangdong**/  
 /***********************************************************/  
   
 #include <opencv2\opencv.hpp>  
 #include <cstring>  
 #include <cstdio>  
 #include <cmath>  
 #include <ctime>  
   
 using std::clock_t;  
 using std::clock;  
 using namespace cv;  
   
 //const parameters for image  
 const int NUM_NEGIMAGE = 1000;  
 const int NUM_POSIMAGE = 500;  
 const int NUM_IMAGE = 1500;  
 const int NUM_TESTIMAGE = 22;  
 const int MAX_DIMENSION = 3781;  
 const int IMAGE_ROWS = 128;  
 const int IMAGE_COLS = 64;  
 const int CELLSIZE = 8;  
 const int BLOCKSIZE = 16;  
 const int MOVELENGTH = 8;  
 const int BINSIZE = 9;  
 const double PI = 2*acos(0.0);  
 const double eps = 1e-8;  
   
 //mediate parameter  
 const int NUM_BLOCK_ROWS = (IMAGE_ROWS-BLOCKSIZE)/MOVELENGTH+1;  
 const int NUM_BLOCK_COLS = (IMAGE_COLS-BLOCKSIZE)/MOVELENGTH+1;  
 const int NUM_BLOCK_FEATURES = (BLOCKSIZE/CELLSIZE)*(BLOCKSIZE/CELLSIZE)*BINSIZE+1;//zero for theta[0]  
   
 //data from image  
 //since the features in the adaboost should contain a single block, it's better to define the feature of 3-dimension;  
 double features[NUM_IMAGE][NUM_BLOCK_ROWS][NUM_BLOCK_COLS][NUM_BLOCK_FEATURES];  
 double type[NUM_IMAGE]; //1 - pos, 0 - neg  
 double y[NUM_IMAGE]; //1 - pos, -1 - neg  
   
 //number of weak classifier(changing in experiment)  
 const int NUM_WEAKCLASSIFIER = 100;  
   
 //data for adaboost  
 double weight[NUM_IMAGE];  
   
 //logistic function(dimension is given by NUM_BLOCK_FEATURES(37 in this setting))  
 double logistic(double theta[], double x[])  
 {  
     double ans = 0;  
     for(int i = 0 ; i < NUM_BLOCK_FEATURES; i++)  
     {  
         ans += theta[i]*x[i];  
     }  
     return 1/(1+std::exp(-ans));  
 }  
   
 struct WeakClassifier  
 {  
     double _theta[NUM_BLOCK_FEATURES]; //threshold classifier  
     int _index_row;  //classify by the features in block[_block_row][_block_y]  
     int _index_col;  
     int _isreverse; //1 for (> := pos, < := neg); 0 for (< := pos, >:= neg)  
     double _alpha;   
     double _error;  
     void clear()  
     {  
         memset(_theta, 0.0, NUM_BLOCK_FEATURES*sizeof(double));  
         _alpha = 0.0;  
         _error = 1;  
         _index_row = -1;  
         _index_col = -1;  
         _isreverse = true;  
     }  
   
     //return 1 or -1  
     int cal(double x[NUM_BLOCK_ROWS][NUM_BLOCK_COLS][NUM_BLOCK_FEATURES])  
     {  
         int ans = logistic(_theta, x[_index_row][_index_col]);  
         if(ans > 0.5)  
         {  
             if(_isreverse)  
                 return -1;  
             else  
                 return 1;  
         }  
         else  
         {  
             if(_isreverse)  
                 return 1;  
             else  
                 return -1;  
         }  
     }  
   
     void print()  
     {  
         //theta  
         for(int i = 0 ; i < NUM_BLOCK_FEATURES; i++)  
         printf("%lf ", _theta[i]);  
         printf("\n");  
   
         //int _index_row;  
         printf("%d ",_index_row);  
   
         //int _index_col;  
         printf("%d ",_index_col);  
   
         //int _isreverse; //1 for (> := pos, < := neg); 0 for (< := pos, >:= neg)  
         printf("%d ",_isreverse);  
   
         //double _alpha;  
         printf("%lf ",_alpha);  
           
         //double _error;  
         printf("%lf \n",_error);  
     }  
 }weakClassifier[NUM_WEAKCLASSIFIER];  
   
 //Util Function  
 double arc2angle(double arc)  
 {  
     return arc/PI*180.0;  
 }  
   
 double angle2arc(double angle)  
 {  
     return angle/180.0*PI;  
 }  
   
 void posfilename(int i, char* filename)  
 {  
     sprintf(filename, "pos/pos (%d).png", i);  
     return;  
 }  
   
 void negfilename(int i, char* filename)  
 {  
     sprintf(filename, "neg/neg (%d).png", i);  
     return;  
 }  
   
 void testfilename(int i, char* filename)  
 {  
     sprintf(filename, "test_pos/test (%d).png", i);  
     return ;  
 }  
   
 //I(x,y) = sqrt(I(x,y))  
 void normalizeImage(Mat& inputImage)  
 {  
     // accept only char type matrices    
     CV_Assert(inputImage.depth() != sizeof(uchar));    
     int channels = inputImage.channels();    
     int nRows = inputImage.rows ;    
     int nCols = inputImage.cols* channels;    
     if (inputImage.isContinuous())    
     {    
         nCols *= nRows;    
         nRows = 1;    
     }    
     int i,j;    
     uchar* p;    
     for( i = 0; i < nRows; ++i)    
     {    
         p = inputImage.ptr<uchar>(i);    
         for ( j = 0; j < nCols; ++j)    
         {    
             p[j] = int(sqrt(p[j]*1.0));    
         }    
     }  
     return;  
 }  
   
 //I(x,y) 第一维的梯度为xGradient  
 void calGredient(const Mat& inputImage, double xGradient[IMAGE_ROWS][IMAGE_COLS], double yGradient[IMAGE_ROWS][IMAGE_COLS])  
 {  
     uchar* dataptr = inputImage.data;  
     int nrows = inputImage.rows;  
     int ncols = inputImage.cols;  
   
     //cal xgradient  
     for(int i = 1 ; i < nrows - 1; i++)  
     {  
         for(int j = 0 ; j < ncols; j++)  
         {  
             xGradient[i][j] = inputImage.at<uchar>(i+1,j) - inputImage.at<uchar>(i-1,j);  
         }  
     }  
   
     //cal margin  
     for(int i = 0 ; i < ncols; i++)  
     {  
         xGradient[0][i] = (inputImage.at<uchar>(1,i) - inputImage.at<uchar>(0,i))*2;  
         xGradient[nrows-1][i] = (inputImage.at<uchar>(nrows-1,i) - inputImage.at<uchar>(nrows-2,i))*2;  
     }  
   
     //cal ygradient  
     for(int i = 0 ; i < nrows ; i++)  
     {  
         for(int j = 1 ; j < ncols - 1; j++)  
         {  
             yGradient[i][j] = inputImage.at<uchar>(i,j+1) - inputImage.at<uchar>(i,j-1);  
         }  
     }  
   
     //cal margin  
     for(int i = 0 ; i < nrows; i++)  
     {  
         xGradient[i][0] = (inputImage.at<uchar>(i,1) - inputImage.at<uchar>(i,0))*2;  
         xGradient[i][ncols-1] = (inputImage.at<uchar>(i,ncols-1) - inputImage.at<uchar>(i,ncols-2))*2;  
     }  
 }  
   
 //cal the HogFeatures by block  
 void calHogFeatures(Mat& inputImage, double outputFeature[NUM_BLOCK_ROWS][NUM_BLOCK_COLS][NUM_BLOCK_FEATURES])  
 {  
     int nrows = inputImage.rows;  
     int ncols = inputImage.cols;  
     int type = inputImage.type();  
   
     if(nrows != IMAGE_ROWS || ncols != IMAGE_COLS)  
         abort();  
   
     //cal x,yGradient  
     double xGradient[IMAGE_ROWS][IMAGE_COLS];  
     double yGradient[IMAGE_ROWS][IMAGE_COLS];  
     calGredient(inputImage, xGradient, yGradient);  
   
     //computation median  
     double gradient[IMAGE_ROWS][IMAGE_COLS];  
     double direction[IMAGE_ROWS][IMAGE_COLS];  
   
     for(int i = 0 ; i < nrows; i++)  
     {  
         for(int j = 0 ; j < ncols; j++)  
         {  
             double gx = xGradient[i][j];  
             double gy = yGradient[i][j];  
             gradient[i][j] = sqrt(gx*gx + gy*gy);  
             direction[i][j] = arc2angle(atan2(gy, gx));  
         }  
     }  
   
     //compute cellinfo 8*8  
     double cellinfo[IMAGE_ROWS/CELLSIZE][IMAGE_COLS/CELLSIZE][BINSIZE];  
     memset(cellinfo, 0, sizeof(cellinfo));  
   
     for(int i = 0; i < IMAGE_ROWS/CELLSIZE; i++)  
     {  
         for(int j = 0 ; j < IMAGE_COLS/CELLSIZE; j++)  
         {  
             double* cell = cellinfo[i][j];  
               
             //cal single cellinfo of 8*8  
             for(int ci = 0 ; ci < CELLSIZE; ci++)  
             {  
                 for(int cj = 0; cj < CELLSIZE; cj++)  
                 {  
                     //find org pix;  
                     int px = i*CELLSIZE + ci;  
                     int py = j*CELLSIZE + cj;  
   
                     int binindex = int((direction[px][py]+180.0)/(360.0/BINSIZE));  
                     //handle bound   
                     if(fabs(direction[px][py]-180) < eps)  
                     {  
                         binindex = BINSIZE-1;  
                     }  
                     if(fabs(direction[px][py]+180) < eps)  
                     {  
                         binindex = 0;  
                     }  
                     if(binindex < 0 || binindex >= BINSIZE)  
                     {  
                         printf("Wrong binindex: %d %lf %lf %lf", binindex, xGradient[px][py], yGradient[px][py], direction[px][py]);  
                         abort();  
                     }  
   
                     cell[binindex] += gradient[px][py];  
                 }  
             }  
         }  
     }  
   
     /*double blockinfo[(IMAGE_ROWS-BLOCKSIZE)/MOVELENGTH+1][(IMAGE_COLS-BLOCKSIZE)/MOVELENGTH+1][(BLOCKSIZE/CELLSIZE)*(BLOCKSIZE/CELLSIZE)*BINSIZE];*/  
   
     if(MOVELENGTH%CELLSIZE != 0)  
     {  
         printf("MOVELENGTH%CELLSIZE != 0");  
         abort();  
     }  
   
     //cal blockinfo  
     for(int i = 0 ; i < (IMAGE_ROWS-BLOCKSIZE)/MOVELENGTH + 1; i++)  
     {  
         for(int j = 0 ; j < (IMAGE_COLS-BLOCKSIZE)/MOVELENGTH + 1; j++)  
         {  
             int bfindex = 0; outputFeature[i][j][bfindex++] = 1;  
   
             //cal the position of this block  
             for(int c1 = 0; c1 < BLOCKSIZE/CELLSIZE; c1++)  
             {  
                 for(int c2 = 0 ; c2 < BLOCKSIZE/CELLSIZE; c2++)  
                 {  
                     //cal the index of cell  
                     int cx = i*MOVELENGTH/CELLSIZE+c1;  
                     int cy = j*MOVELENGTH/CELLSIZE+c2;  
   
                     for(int binindex = 0 ; binindex < BINSIZE; binindex++)  
                     {  
                         outputFeature[i][j][bfindex++] = cellinfo[cx][cy][binindex];  
                     }  
                 }  
             }  
         }  
     }  
     return;  
 }  
   
 //use global variables  
 void trainLogisticRegression(int block_row,int block_col, double theta[], double& errorrate, int& isreverse)  
 {  
     double theta1[NUM_BLOCK_FEATURES], theta2[NUM_BLOCK_FEATURES];  
     memset(theta1, 0, NUM_BLOCK_FEATURES*sizeof(double));  
     memset(theta2, 0, NUM_BLOCK_FEATURES*sizeof(double));  
     double errorrate1 = 0;  
     double errorrate2 = 0;  
     double rightnum1 = 0;  
     double rightnum2 = 0;  
     isreverse = 0;  
   
     //cal parameter thetas  
     for(int k = 0 ; k < 100000; k++)  
     {  
         int i = rand()%NUM_IMAGE;  
         int j = rand()%NUM_BLOCK_FEATURES;  
         theta1[j] = theta1[j] + weight[i]*0.01*(type[i] - logistic(theta1, features[i][block_row][block_col]))*features[i][block_row][block_col][j];  
     }  
   
     for(int i = 0 ; i < NUM_IMAGE; i++)  
     {  
         double tmp = logistic(theta1, features[i][block_row][block_col]);  
         if(tmp > 0.5 && fabs(type[i] - 1) < eps)  
             rightnum1 += 1.0*weight[i];  
         if(tmp < 0.5 && fabs(type[i] - 0) < eps)  
             rightnum1 += 1.0*weight[i];  
     }  
     errorrate1 = 1 - rightnum1;  
   
     //calreverse  
     for(int k = 0 ; k < 100000; k++)  
     {  
         int i = rand()%NUM_IMAGE;  
         int j = rand()%NUM_BLOCK_FEATURES;  
         theta2[j] = theta2[j] + weight[i]*0.01*(1- type[i] - logistic(theta2, features[i][block_row][block_col]))*features[i][block_row][block_col][j];  
     }  
   
     for(int i = 0 ; i < NUM_IMAGE; i++)  
     {  
         double tmp = logistic(theta2, features[i][block_row][block_col]);  
         if(tmp > 0.5 && fabs(type[i] - 0) < eps)  
             rightnum2 += 1.0*weight[i];  
         if(tmp < 0.5 && fabs(type[i] - 1) < eps)  
             rightnum2 += 1.0*weight[i];  
     }  
     errorrate2 = 1 - rightnum2;  
   
     if(errorrate1 < errorrate2)  
     {  
         for(int i = 0 ; i < NUM_BLOCK_FEATURES; i++)  
         {  
             theta[i] = theta1[i];  
         }  
         isreverse = 0;  
         errorrate = errorrate1 + eps;  
     }  
     else  
     {  
         for(int i = 0 ; i < NUM_BLOCK_FEATURES; i++)  
         {  
             theta[i] = theta2[i];  
         }  
         isreverse = 1;  
         errorrate = errorrate2 + eps;  
     }  
     return;  
 }  
   
 WeakClassifier trainClassifier()  
 {  
     WeakClassifier ansclassifier;  
     double theta[NUM_BLOCK_FEATURES];  
     double errorrate = 1;  
     int isreverse = 0;  
     double best_theta[NUM_BLOCK_FEATURES];  
     double best_errorrate = 1;  
     int best_row = -1;  
     int best_col = -1;  
     int best_isreverse = 0;  
   
     //select best weak classifier  
     for(int i = 0 ; i < NUM_BLOCK_ROWS; i++)  
     {  
         for(int j = 0 ; j < NUM_BLOCK_COLS; j++)  
         {  
             trainLogisticRegression(i,j,theta,errorrate, isreverse);  
               
             if(errorrate < 0)  
             {  
                 printf("Wrong errorrate < 0 : %lf", errorrate);  
                 abort();  
             }  
   
             if(errorrate < best_errorrate)  
             {  
                 for(int tempi = 0 ; tempi < NUM_BLOCK_FEATURES; tempi++)  
                 {  
                     best_theta[tempi] = theta[tempi];  
                 }  
                 best_errorrate = errorrate;  
                 best_row = i;  
                 best_col = j;  
                 best_isreverse = isreverse;  
             }  
         }  
     }  
   
     if(best_errorrate > 0.5)  
     {  
         printf("The best_errorrate is greater than 0.5.\n");  
         abort();  
     }  
   
     //set parameters;  
     ansclassifier._alpha = 1.0/2*std::log((1-best_errorrate)/best_errorrate);  
     ansclassifier._error = best_errorrate;  
     ansclassifier._index_col = best_col;  
     ansclassifier._index_row = best_row;  
     ansclassifier._isreverse = best_isreverse;  
     for(int i = 0 ; i < NUM_BLOCK_FEATURES; i++) ansclassifier._theta[i] = best_theta[i];  
   
     return ansclassifier;  
 }  
   
 int calByStrongClassifier(double x[NUM_BLOCK_ROWS][NUM_BLOCK_COLS][NUM_BLOCK_FEATURES])  
 {  
     double ans = 0;  
     for(int i = 0 ; i < NUM_WEAKCLASSIFIER; i++)  
     {  
         ans += weakClassifier[i]._alpha * weakClassifier[i].cal(x);  
     }  
     if(ans > 0)  
         return 1;  
     else  
         return -1;  
 }  
   
   
   
 /* 
 size: 128*64; 
 type: CV_8UC1; 
 Block大小为18*18； 
 Cell大小为6*6； 
 Block在检测窗口中上下移动尺寸为6*6； 
 1个cell的梯度直方图化成9个bin； 
 滑动窗口在检测图片中滑动的尺寸为6*6； 
 */  
   
 int main()  
 {  
     char filename[100];  
     IplImage* inputImage = NULL;  
     clock_t timecount = clock();  
   
     //load posimage  
     for(int i = 0 ; i < NUM_POSIMAGE; i++)  
     {  
         posfilename(i+1 ,filename);  
           
         //load grey image: set the parameter to 0;  
         inputImage = cvLoadImage(filename, 0);  
           
           
         //cal features;  
         Mat inputMat(inputImage);  
         calHogFeatures(inputMat, features[i]);  
         type[i] = 1;  
         y[i] = 1;  
         //printf("%d \n", inputMat.cols);  
   
         //release memory  
         inputMat.release();  
         cvReleaseImage(&inputImage);  
         inputImage = NULL;  
     }  
   
     printf("The feature process of pos-image have done in %d second.\n", (clock()-timecount)/1000);  
     timecount = clock();  
   
     //load neg images  
     for(int i = 0; i < NUM_NEGIMAGE; i++)  
     {  
         negfilename(i+1, filename);  
   
         //load grey image: set the parameter to 0;  
         inputImage = cvLoadImage(filename, 0);  
         type[NUM_POSIMAGE+i] = 0;  
         y[NUM_POSIMAGE+i] = -1;  
   
         Mat inputMat(inputImage);  
         calHogFeatures(inputMat, features[NUM_POSIMAGE+i]);  
           
         //release memory  
         inputMat.release();  
         cvReleaseImage(&inputImage);  
         inputImage = NULL;  
     }  
   
     printf("The feature process of neg-image have done in %d second.\n", (clock()-timecount)/1000);  
     timecount = clock();  
   
     //init weight array  
     for(int i = 0 ; i < NUM_IMAGE; i++)  
     {  
         weight[i] = 1.0/NUM_IMAGE;  
     }  
   
     //freopen  
     freopen("HOG_CLASSIFIER.txt", "w", stdout);  
   
     //print number of weakclassifiers;  
     printf("%d\n", NUM_WEAKCLASSIFIER);  
   
     //adaboost framework  
     for(int classifierindex = 0 ; classifierindex < NUM_WEAKCLASSIFIER; classifierindex++)  
     {  
         weakClassifier[classifierindex] = trainClassifier();  
   
         double error = weakClassifier[classifierindex]._error;  
         double alpha = weakClassifier[classifierindex]._alpha;  
   
         //printf("%d classifier: %lf ====\n",classifierindex, error);  
         //printf("_index_row %d _index_col %d\n", weakClassifier[classifierindex]._index_row, weakClassifier[classifierindex]._index_col);  
   
         double identitysum = 0;  
         for(int sampleindex = 0 ; sampleindex < NUM_IMAGE; sampleindex++)  
         {  
             weight[sampleindex] *= std::exp(-alpha*y[sampleindex]*weakClassifier[classifierindex].cal(features[sampleindex]));  
             identitysum += weight[sampleindex];  
         }  
   
         //reweight  
         for(int sampleindex = 0 ; sampleindex < NUM_IMAGE; sampleindex++)  
         {  
             weight[sampleindex] /= identitysum;  
         }  
   
         weakClassifier[classifierindex].print();  
     }  
   
     freopen("CON", "w", stdout);  
     int rightnum = 0;  
     for(int testindex = 0 ;testindex < NUM_TESTIMAGE; testindex ++)  
     {  
         //posfilename(testindex+1, filename);  
         testfilename(testindex+1, filename);  
         inputImage = cvLoadImage(filename, 0);  
   
         double testfeatures[NUM_BLOCK_ROWS][NUM_BLOCK_COLS][NUM_BLOCK_FEATURES];  
         memset(testfeatures, 0, sizeof(testfeatures));  
   
         Mat inputMat(inputImage);  
         calHogFeatures(inputMat, testfeatures);  
   
         if(calByStrongClassifier(testfeatures) == 1)  
         {  
             rightnum++;  
             //printf("Yes\n");  
         }  
         else  
             //printf("No\n");  
   
         inputMat.release();  
     }  
     printf("Accuracy: %d\n", rightnum);  
 }  

//测试数据是网上流行的128*64灰度行人图像数据。

ChenLee_1

关注

2
点赞
踩
24

收藏

觉得还不错? 一键收藏
7
评论
基于HOG特征的Adaboost行人检测

申明，本文非笔者原创，原文转载自：http://blog.csdn.net/van_ruin/article/details/91665911.方向梯度直方图（Histogramof Oriented Gradient, HOG）特征是一种在计算机视觉和图像处理中用来进行物体检测的特征描述子。它通过计算和统计图像局部区域的梯度方向直方图来构成特征。基本知识可以参考博客：http
复制链接

扫一扫