OpenCV源码分析（一）级联分类器haartraining_已训练好的开源的级联分类器-CSDN博客

本文链接：https://blog.csdn.net/hj199241/article/details/51798909

OpenCV手册里讲到OpenCV_haartraining和OpenCV_traincascade都可以训练级联分类器。

区别是：traincascade能支持Haar和LBP特征，同时更易于增加其他特征；两者所输出的分类器文件格式不同，traincascade支持两种格式，可以从旧格式导出训练好的级联分类器。

与训练相关的辅助程序

createsamples.cpp：用来准备训练用的正样本数据和测试数据，输出.vec的文件，能被两种程序使用，文件以二进制方式存储图像。

performance.cpp：用来评估harrtraing输出的分类器，它读入一组标注好的图像，运行分类器，并报告性能，如检测到的物体数目、漏检数目、误检数目和其他信息。

首先训练Haar分类器的步骤：

1.准备正负样本；

2.用createsamples程序建立正样本集合；

3.用haartraining程序训练，得到最终的分类器模型xml文件。

详细过程可参考该连接：http://www.doc88.com/p-686754097427.html

为深入了解，对每个函数进行详细的了解，虽然haartraining相比traincascade更老，但还是先看haartraining。

 * haartraining.cpp
 * 训练级联分类器
 * Train cascade classifier
 */

#include <cstdio>
#include <cstring>
#include <cstdlib>

using namespace std;

#include "cvhaartraining.h"

int main( int argc, char* argv[] )
{
    int i = 0;
    char* nullname = (char*)"(NULL)";

    char* vecname = NULL;
    char* dirname = NULL;
    char* bgname  = NULL;

    bool bg_vecfile = false;
    int npos    = 2000;
    int nneg    = 2000;
    int nstages = 14;
    int mem     = 200;
    int nsplits = 1;
    float minhitrate     = 0.995F;
    float maxfalsealarm  = 0.5F;
    float weightfraction = 0.95F;
    int mode         = 0;
    int symmetric    = 1;
    int equalweights = 0;
    int width  = 24;
    int height = 24;
    const char* boosttypes[] = { "DAB", "RAB", "LB", "GAB" };
    int boosttype = 3;
    const char* stumperrors[] = { "misclass", "gini", "entropy" };
    int stumperror = 0;
    int maxtreesplits = 0;
    int minpos = 500;

    if( argc == 1 )
    {

<span style="white-space:pre">	</span>/*训练时命令行参数列表:详见cvharrtraining.h中的cvCreateCascadeClassifier函数参数说明<span style="line-height: 20.8px;">*/</span>

        printf( "Usage: %s\n  -data <dir_name>\n"//<span style="line-height: 20.8px;">即将创建的级联分类器的目录名称，必须是存在的，并包含0,1,或2级子目录。</span>
                "  -vec <vec_file_name>\n"//<span style="line-height: 20.8px;">正样本图像的.vec文件名称。</span>
                "  -bg <background_file_name>\n"//<span style="line-height: 20.8px;">背景图像的描述文件</span>
                "  [-bg-vecfile]\n"<span style="white-space:pre">		</span>
                "  [-npos <number_of_positive_samples = %d>]\n"//<span style="line-height: 20.8px;">每级分类器用于训练的正样本数目</span>
                "  [-nneg <number_of_negative_samples = %d>]\n"//<span style="line-height: 20.8px;">每</span><span style="line-height: 20.8px;">级分类器</span><span style="line-height: 20.8px;">用于训练的负样本数目</span>
                "  [-nstages <number_of_stages = %d>]\n"//<span style="line-height: 20.8px;">训练的分类器级数</span>
                "  [-nsplits <number_of_splits = %d>]\n"//<span style="line-height: 20.8px;">1个树桩、2或多叉树，每个弱分类器上的二元分割数目</span>
                "  [-mem <memory_in_MB = %d>]\n"
                "  [-sym (default)] [-nonsym]\n"  //若非0，则假设垂直对称
                "  [-minhitrate <min_hit_rate = %f>]\n"  //每级所需的最小命中率
                "  [-maxfalsealarm <max_false_alarm_rate = %f>]\n" //每级所需的最大误报率
                "  [-weighttrimming <weight_trimming = %f>]\n"  //权重微调参数
                "  [-eqw]\n"<span style="white-space:pre">		</span>//若非0，则所有样本的初始权值相等
                "  [-mode <BASIC (default) | CORE | ALL>]\n" //mode:0-BASIC=Viola, 1-CORE=ALL upright 2-ALL=ALL features
                "  [-w <sample_width = %d>]\n"  //样本宽度
                "  [-h <sample_height = %d>]\n" //样本高度
                "  [-bt <DAB | RAB | LB | GAB (default)>]\n"  //应用的boosting算法类型。<span style="line-height: 20.8px;">0 - Discrete AdaBoost</span><span style="line-height: 20.8px;"> 1 - Real AdaBoost </span><span style="line-height: 20.8px;">2 - LogitBoost </span><span style="line-height: 20.8px;">3 - Gentle AdaBoost</span>

<span style="line-height: 20.8px;">                "  [-err <misclass (default) | gini | entropy>]\n" //若使用Discrete AdaBoost算法应用的误差类型</span>

                "  [-maxtreesplits <max_number_of_splits_in_tree_cascade = %d>]\n" //树级联中最大分裂数目
                "  [-minpos <min_number_of_positive_samples_per_cluster = %d>]\n", //每类最小的正样本数目
                argv[0], npos, nneg, nstages, nsplits, mem,
                minhitrate, maxfalsealarm, weightfraction, width, height,
                maxtreesplits, minpos );

        return 0;
    }

    for( i = 1; i < argc; i++ )
    {
        if( !strcmp( argv[i], "-data" ) )
        {
            dirname = argv[++i];
        }
        else if( !strcmp( argv[i], "-vec" ) )
        {
            vecname = argv[++i];
        }
        else if( !strcmp( argv[i], "-bg" ) )
        {
            bgname = argv[++i];
        }
        else if( !strcmp( argv[i], "-bg-vecfile" ) )
        {
            bg_vecfile = true;
        }
        else if( !strcmp( argv[i], "-npos" ) )
        {
            npos = atoi( argv[++i] );
        }
        else if( !strcmp( argv[i], "-nneg" ) )
        {
            nneg = atoi( argv[++i] );
        }
        else if( !strcmp( argv[i], "-nstages" ) )
        {
            nstages = atoi( argv[++i] );
        }
        else if( !strcmp( argv[i], "-nsplits" ) )
        {
            nsplits = atoi( argv[++i] );
        }
        else if( !strcmp( argv[i], "-mem" ) )
        {
            mem = atoi( argv[++i] );
        }
        else if( !strcmp( argv[i], "-sym" ) )
        {
            symmetric = 1;
        }
        else if( !strcmp( argv[i], "-nonsym" ) )
        {
            symmetric = 0;
        }
        else if( !strcmp( argv[i], "-minhitrate" ) )
        {
            minhitrate = (float) atof( argv[++i] );
        }
        else if( !strcmp( argv[i], "-maxfalsealarm" ) )
        {
            maxfalsealarm = (float) atof( argv[++i] );
        }
        else if( !strcmp( argv[i], "-weighttrimming" ) )
        {
            weightfraction = (float) atof( argv[++i] );
        }
        else if( !strcmp( argv[i], "-eqw" ) )
        {
            equalweights = 1;
        }
        else if( !strcmp( argv[i], "-mode" ) )
        {
            char* tmp = argv[++i];

            if( !strcmp( tmp, "CORE" ) )
            {
                mode = 1;
            }
            else if( !strcmp( tmp, "ALL" ) )
            {
                mode = 2;
            }
            else
            {
                mode = 0;
            }
        }
        else if( !strcmp( argv[i], "-w" ) )
        {
            width = atoi( argv[++i] );
        }
        else if( !strcmp( argv[i], "-h" ) )
        {
            height = atoi( argv[++i] );
        }
        else if( !strcmp( argv[i], "-bt" ) )
        {
            i++;
            if( !strcmp( argv[i], boosttypes[0] ) )
            {
                boosttype = 0;
            }
            else if( !strcmp( argv[i], boosttypes[1] ) )
            {
                boosttype = 1;
            }
            else if( !strcmp( argv[i], boosttypes[2] ) )
            {
                boosttype = 2;
            }
            else
            {
                boosttype = 3;
            }
        }
        else if( !strcmp( argv[i], "-err" ) )
        {
            i++;
            if( !strcmp( argv[i], stumperrors[0] ) )
            {
                stumperror = 0;
            }
            else if( !strcmp( argv[i], stumperrors[1] ) )
            {
                stumperror = 1;
            }
            else
            {
                stumperror = 2;
            }
        }
        else if( !strcmp( argv[i], "-maxtreesplits" ) )
        {
            maxtreesplits = atoi( argv[++i] );
        }
        else if( !strcmp( argv[i], "-minpos" ) )
        {
            minpos = atoi( argv[++i] );
        }
    }

    printf( "Data dir name: %s\n", ((dirname == NULL) ? nullname : dirname ) );
    printf( "Vec file name: %s\n", ((vecname == NULL) ? nullname : vecname ) );
    printf( "BG  file name: %s, is a vecfile: %s\n", ((bgname == NULL) ? nullname : bgname ), bg_vecfile ? "yes" : "no" );
    printf( "Num pos: %d\n", npos );
    printf( "Num neg: %d\n", nneg );
    printf( "Num stages: %d\n", nstages );
    printf( "Num splits: %d (%s as weak classifier)\n", nsplits,
        (nsplits == 1) ? "stump" : "tree" );
    printf( "Mem: %d MB\n", mem );
    printf( "Symmetric: %s\n", (symmetric) ? "TRUE" : "FALSE" );
    printf( "Min hit rate: %f\n", minhitrate );
    printf( "Max false alarm rate: %f\n", maxfalsealarm );
    printf( "Weight trimming: %f\n", weightfraction );
    printf( "Equal weights: %s\n", (equalweights) ? "TRUE" : "FALSE" );
    printf( "Mode: %s\n", ( (mode == 0) ? "BASIC" : ( (mode == 1) ? "CORE" : "ALL") ) );
    printf( "Width: %d\n", width );
    printf( "Height: %d\n", height );
    //printf( "Max num of precalculated features: %d\n", numprecalculated );
    printf( "Applied boosting algorithm: %s\n", boosttypes[boosttype] );
    printf( "Error (valid only for Discrete and Real AdaBoost): %s\n",
            stumperrors[stumperror] );

    printf( "Max number of splits in tree cascade: %d\n", maxtreesplits );
    printf( "Min number of positive samples per cluster: %d\n", minpos );

//创建树结构的级联分类器
    cvCreateTreeCascadeClassifier( dirname, vecname, bgname,
                               npos, nneg, nstages, mem,
                               nsplits,
                               minhitrate, maxfalsealarm, weightfraction,
                               mode, symmetric,
                               equalweights, width, height,
                               boosttype, stumperror,
                               maxtreesplits, minpos, bg_vecfile );

    return 0;
}