OpenCV手册里讲到OpenCV_haartraining和OpenCV_traincascade都可以训练级联分类器。
区别是:traincascade能支持Haar和LBP特征,同时更易于增加其他特征;两者所输出的分类器文件格式不同,traincascade支持两种格式,可以从旧格式导出训练好的级联分类器。
与训练相关的辅助程序
createsamples.cpp:用来准备训练用的正样本数据和测试数据,输出.vec的文件,能被两种程序使用,文件以二进制方式存储图像。
performance.cpp:用来评估harrtraing输出的分类器,它读入一组标注好的图像,运行分类器,并报告性能,如检测到的物体数目、漏检数目、误检数目和其他信息。
首先训练Haar分类器的步骤:
1.准备正负样本;
2.用createsamples程序建立正样本集合;
3.用haartraining程序训练,得到最终的分类器模型xml文件。
详细过程可参考该连接:http://www.doc88.com/p-686754097427.html
为深入了解,对每个函数进行详细的了解,虽然haartraining相比traincascade更老,但还是先看haartraining。
* haartraining.cpp
* 训练级联分类器
* Train cascade classifier
*/
#include <cstdio>
#include <cstring>
#include <cstdlib>
using namespace std;
#include "cvhaartraining.h"
int main( int argc, char* argv[] )
{
int i = 0;
char* nullname = (char*)"(NULL)";
char* vecname = NULL;
char* dirname = NULL;
char* bgname = NULL;
bool bg_vecfile = false;
int npos = 2000;
int nneg = 2000;
int nstages = 14;
int mem = 200;
int nsplits = 1;
float minhitrate = 0.995F;
float maxfalsealarm = 0.5F;
float weightfraction = 0.95F;
int mode = 0;
int symmetric = 1;
int equalweights = 0;
int width = 24;
int height = 24;
const char* boosttypes[] = { "DAB", "RAB", "LB", "GAB" };
int boosttype = 3;
const char* stumperrors[] = { "misclass", "gini", "entropy" };
int stumperror = 0;
int maxtreesplits = 0;
int minpos = 500;
if( argc == 1 )
{
<span style="white-space:pre"> </span>/*训练时命令行参数列表:详见cvharrtraining.h中的cvCreateCascadeClassifier函数参数说明<span style="line-height: 20.8px;">*/</span>
printf( "Usage: %s\n -data <dir_name>\n"//<span style="line-height: 20.8px;">即将创建的级联分类器的目录名称,必须是存在的,并包含0,1,或2级子目录。</span>
" -vec <vec_file_name>\n"//<span style="line-height: 20.8px;">正样本图像的.vec文件名称。</span>
" -bg <background_file_name>\n"//<span style="line-height: 20.8px;">背景图像的描述文件</span>
" [-bg-vecfile]\n"<span style="white-space:pre"> </span>
" [-npos <number_of_positive_samples = %d>]\n"//<span style="line-height: 20.8px;">每级分类器用于训练的正样本数目</span>
" [-nneg <number_of_negative_samples = %d>]\n"//<span style="line-height: 20.8px;">每</span><span style="line-height: 20.8px;">级分类器</span><span style="line-height: 20.8px;">用于训练的负样本数目</span>
" [-nstages <number_of_stages = %d>]\n"//<span style="line-height: 20.8px;">训练的分类器级数</span>
" [-nsplits <number_of_splits = %d>]\n"//<span style="line-height: 20.8px;">1个树桩、2或多叉树,每个弱分类器上的二元分割数目</span>
" [-mem <memory_in_MB = %d>]\n"
" [-sym (default)] [-nonsym]\n" //若非0,则假设垂直对称
" [-minhitrate <min_hit_rate = %f>]\n" //每级所需的最小命中率
" [-maxfalsealarm <max_false_alarm_rate = %f>]\n" //每级所需的最大误报率
" [-weighttrimming <weight_trimming = %f>]\n" //权重微调参数
" [-eqw]\n"<span style="white-space:pre"> </span>//若非0,则所有样本的初始权值相等
" [-mode <BASIC (default) | CORE | ALL>]\n" //mode:0-BASIC=Viola, 1-CORE=ALL upright 2-ALL=ALL features
" [-w <sample_width = %d>]\n" //样本宽度
" [-h <sample_height = %d>]\n" //样本高度
" [-bt <DAB | RAB | LB | GAB (default)>]\n" //应用的boosting算法类型。<span style="line-height: 20.8px;">0 - Discrete AdaBoost</span><span style="line-height: 20.8px;"> 1 - Real AdaBoost </span><span style="line-height: 20.8px;">2 - LogitBoost </span><span style="line-height: 20.8px;">3 - Gentle AdaBoost</span>
<span style="line-height: 20.8px;"> " [-err <misclass (default) | gini | entropy>]\n" //若使用Discrete AdaBoost算法应用的误差类型</span>
" [-maxtreesplits <max_number_of_splits_in_tree_cascade = %d>]\n" //树级联中最大分裂数目
" [-minpos <min_number_of_positive_samples_per_cluster = %d>]\n", //每类最小的正样本数目
argv[0], npos, nneg, nstages, nsplits, mem,
minhitrate, maxfalsealarm, weightfraction, width, height,
maxtreesplits, minpos );
return 0;
}
for( i = 1; i < argc; i++ )
{
if( !strcmp( argv[i], "-data" ) )
{
dirname = argv[++i];
}
else if( !strcmp( argv[i], "-vec" ) )
{
vecname = argv[++i];
}
else if( !strcmp( argv[i], "-bg" ) )
{
bgname = argv[++i];
}
else if( !strcmp( argv[i], "-bg-vecfile" ) )
{
bg_vecfile = true;
}
else if( !strcmp( argv[i], "-npos" ) )
{
npos = atoi( argv[++i] );
}
else if( !strcmp( argv[i], "-nneg" ) )
{
nneg = atoi( argv[++i] );
}
else if( !strcmp( argv[i], "-nstages" ) )
{
nstages = atoi( argv[++i] );
}
else if( !strcmp( argv[i], "-nsplits" ) )
{
nsplits = atoi( argv[++i] );
}
else if( !strcmp( argv[i], "-mem" ) )
{
mem = atoi( argv[++i] );
}
else if( !strcmp( argv[i], "-sym" ) )
{
symmetric = 1;
}
else if( !strcmp( argv[i], "-nonsym" ) )
{
symmetric = 0;
}
else if( !strcmp( argv[i], "-minhitrate" ) )
{
minhitrate = (float) atof( argv[++i] );
}
else if( !strcmp( argv[i], "-maxfalsealarm" ) )
{
maxfalsealarm = (float) atof( argv[++i] );
}
else if( !strcmp( argv[i], "-weighttrimming" ) )
{
weightfraction = (float) atof( argv[++i] );
}
else if( !strcmp( argv[i], "-eqw" ) )
{
equalweights = 1;
}
else if( !strcmp( argv[i], "-mode" ) )
{
char* tmp = argv[++i];
if( !strcmp( tmp, "CORE" ) )
{
mode = 1;
}
else if( !strcmp( tmp, "ALL" ) )
{
mode = 2;
}
else
{
mode = 0;
}
}
else if( !strcmp( argv[i], "-w" ) )
{
width = atoi( argv[++i] );
}
else if( !strcmp( argv[i], "-h" ) )
{
height = atoi( argv[++i] );
}
else if( !strcmp( argv[i], "-bt" ) )
{
i++;
if( !strcmp( argv[i], boosttypes[0] ) )
{
boosttype = 0;
}
else if( !strcmp( argv[i], boosttypes[1] ) )
{
boosttype = 1;
}
else if( !strcmp( argv[i], boosttypes[2] ) )
{
boosttype = 2;
}
else
{
boosttype = 3;
}
}
else if( !strcmp( argv[i], "-err" ) )
{
i++;
if( !strcmp( argv[i], stumperrors[0] ) )
{
stumperror = 0;
}
else if( !strcmp( argv[i], stumperrors[1] ) )
{
stumperror = 1;
}
else
{
stumperror = 2;
}
}
else if( !strcmp( argv[i], "-maxtreesplits" ) )
{
maxtreesplits = atoi( argv[++i] );
}
else if( !strcmp( argv[i], "-minpos" ) )
{
minpos = atoi( argv[++i] );
}
}
printf( "Data dir name: %s\n", ((dirname == NULL) ? nullname : dirname ) );
printf( "Vec file name: %s\n", ((vecname == NULL) ? nullname : vecname ) );
printf( "BG file name: %s, is a vecfile: %s\n", ((bgname == NULL) ? nullname : bgname ), bg_vecfile ? "yes" : "no" );
printf( "Num pos: %d\n", npos );
printf( "Num neg: %d\n", nneg );
printf( "Num stages: %d\n", nstages );
printf( "Num splits: %d (%s as weak classifier)\n", nsplits,
(nsplits == 1) ? "stump" : "tree" );
printf( "Mem: %d MB\n", mem );
printf( "Symmetric: %s\n", (symmetric) ? "TRUE" : "FALSE" );
printf( "Min hit rate: %f\n", minhitrate );
printf( "Max false alarm rate: %f\n", maxfalsealarm );
printf( "Weight trimming: %f\n", weightfraction );
printf( "Equal weights: %s\n", (equalweights) ? "TRUE" : "FALSE" );
printf( "Mode: %s\n", ( (mode == 0) ? "BASIC" : ( (mode == 1) ? "CORE" : "ALL") ) );
printf( "Width: %d\n", width );
printf( "Height: %d\n", height );
//printf( "Max num of precalculated features: %d\n", numprecalculated );
printf( "Applied boosting algorithm: %s\n", boosttypes[boosttype] );
printf( "Error (valid only for Discrete and Real AdaBoost): %s\n",
stumperrors[stumperror] );
printf( "Max number of splits in tree cascade: %d\n", maxtreesplits );
printf( "Min number of positive samples per cluster: %d\n", minpos );
//创建树结构的级联分类器
cvCreateTreeCascadeClassifier( dirname, vecname, bgname,
npos, nneg, nstages, mem,
nsplits,
minhitrate, maxfalsealarm, weightfraction,
mode, symmetric,
equalweights, width, height,
boosttype, stumperror,
maxtreesplits, minpos, bg_vecfile );
return 0;
}