【原文:http://blog.youtueye.com/work/opencv-hog-peopledetector-trainning.html】
opencv2.0 以上版本提供了行人检测的方法,以opencv2.2为例,该示例为opencv安装目录下的:/samples/cpp/peopledetect.cpp,测试效果如下:
我们先看看示例代码的内容:
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
|
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/objdetect/objdetect.hpp"
#include "opencv2/highgui/highgui.hpp"
#include <stdio.h>
#include <string.h>
#include <ctype.h>
using
namespace
cv;
using
namespace
std;
void
help()
{
printf
(
"\nDemonstrate the use of the HoG descriptor using\n"
" HOGDescriptor::hog.setSVMDetector(HOGDescriptor::getDefaultPeopleDetector());\n"
"Usage:\n"
"./peopledetect (<image_filename> | <image_list>.txt)\n\n"
);
}
int
main(
int
argc,
char
** argv)
{
Mat img;
FILE
* f = 0;
char
_filename[1024];
if
( argc == 1 )
{
printf
(
"Usage: peopledetect (<image_filename> | <image_list>.txt)\n"
);
return
0;
}
img = imread(argv[1]);
if
( img.data )
{
strcpy
(_filename, argv[1]);
}
else
{
f =
fopen
(argv[1],
"rt"
);
if
(!f)
{
fprintf
( stderr,
"ERROR: the specified file could not be loaded\n"
);
return
-1;
}
}
HOGDescriptor hog;
hog.setSVMDetector(HOGDescriptor::getDefaultPeopleDetector());
namedWindow(
"people detector"
, 1);
for
(;;)
{
char
* filename = _filename;
if
(f)
{
if
(!
fgets
(filename, (
int
)
sizeof
(_filename)-2, f))
break
;
//while(*filename && isspace(*filename))
// ++filename;
if
(filename[0] ==
'#'
)
continue
;
int
l =
strlen
(filename);
while
(l > 0 &&
isspace
(filename[l-1]))
--l;
filename[l] =
'\0'
;
img = imread(filename);
}
printf
(
"%s:\n"
, filename);
if
(!img.data)
continue
;
fflush
(stdout);
vector<Rect> found, found_filtered;
double
t = (
double
)getTickCount();
// run the detector with default parameters. to get a higher hit-rate
// (and more false alarms, respectively), decrease the hitThreshold and
// groupThreshold (set groupThreshold to 0 to turn off the grouping completely).
hog.detectMultiScale(img, found, 0, Size(8,8), Size(32,32), 1.05, 2);
t = (
double
)getTickCount() - t;
printf
(
"tdetection time = %gms\n"
, t*1000./cv::getTickFrequency());
size_t
i, j;
for
( i = 0; i < found.size(); i++ )
{
Rect r = found[i];
for
( j = 0; j < found.size(); j++ )
if
( j != i && (r & found[j]) == r)
break
;
if
( j == found.size() )
found_filtered.push_back(r);
}
for
( i = 0; i < found_filtered.size(); i++ )
{
Rect r = found_filtered[i];
// the HOG detector returns slightly larger rectangles than the real objects.
// so we slightly shrink the rectangles to get a nicer output.
r.x += cvRound(r.width*0.1);
r.width = cvRound(r.width*0.8);
r.y += cvRound(r.height*0.07);
r.height = cvRound(r.height*0.8);
rectangle(img, r.tl(), r.br(), cv::Scalar(0,255,0), 3);
}
imshow(
"people detector"
, img);
int
c = waitKey(0) & 255;
if
( c ==
'q'
|| c ==
'Q'
|| !f)
break
;
}
if
(f)
fclose
(f);
return
0;
}
|
你会发现代码中没有类似人脸检测时的分类器(模型)的载入过程,而是用
|
1
|
hog.setSVMDetector(HOGDescriptor::getDefaultPeopleDetector());
|
载入默认模型。找到hog的源码,在/modules/objdetect/hog.cpp,从第907行开始的代码如下:
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
|
vector<
float
> HOGDescriptor::getDefaultPeopleDetector()
{
static
const
float
detector[] = {
0.05359386f, -0.14721455f, -0.05532170f, 0.05077307f,
0.11547081f, -0.04268804f, 0.04635834f, -0.05468199f, 0.08232084f,
0.10424068f, -0.02294518f, 0.01108519f, 0.01378693f, 0.11193510f,
0.01268418f, 0.08528346f, -0.06309239f, 0.13054633f, 0.08100729f,
-0.05209739f, -0.04315529f, 0.09341384f, 0.11035026f, -0.07596218f,
-0.05517511f, -0.04465296f, 0.02947334f, 0.04555536f,
-3.55954492e-003f, 0.07818956f, 0.07730991f, 0.07890715f, 0.06222893f,
0.09001380f, -0.03574381f, 0.03414327f, 0.05677258f, -0.04773581f,
0.03746637f, -0.03521175f, 0.06955440f, -0.03849038f, 0.01052293f,
0.01736112f, 0.10867710f, 0.08748853f, 3.29739624e-003f, 0.10907028f,
0.07913758f, 0.10393070f, 0.02091867f, 0.11594022f, 0.13182420f,
0.09879354f, 0.05362710f, -0.06745391f, -7.01260753e-003f,....
|
显然,行人检测所用到的模型数据被做为常量写在源代码中,但这些模型数据数据是如何的来的呢?如果我要用我自己的样本tainning ,然后用的到的model去detect该如何操作呢?别急,我们从opencv行人检测所用到的算法说起。
opencv行人检测所用到的算法源自Navneet Dalal和Bill Triggs在2005's CVPR上的文章Histograms of Oriented Gradients for Human Detection.这里是英文和中文的介绍。作者所在的研究机构(INRIA:French National Institute for Research in Computer Science and Control,法国国家计算机技术和控制研究所)发布了这套算法的源码:INRIA Object Detection and Localization Toolkit.
那么,opencv中的PeopleDetector是否就原封不动地使用了这套算法呢,为了求证这一问题,笔者首先比较了两者的模型数据:下载OLTbinaries.zip,找到/HOG/model_4BiSVMLight.alt文件(二进制形式的数据),用代码将其中数据保存为文本格式(这个要根据上面的源码learcode.zip),其内容如下:
|
1
2
3
4
5
6
7
8
9
10
11
12
|
0.05359386 -0.14721455 -0.05532170 0.05077307 0.11547081 -0.04268804 0.04635834 -0.05468199 0.08232084 0.10424068
-0.02294518 0.01108519 0.01378693 0.11193510 0.01268418 0.08528346 -0.06309239 0.13054632 0.08100729 -0.05209739
-0.04315529 0.09341384 0.11035026 -0.07596217 -0.05517511 -0.04465296 0.02947334 0.04555536 -0.00355954 0.07818956
0.07730991 0.07890716 0.06222893 0.09001381 -0.03574381 0.03414327 0.05677258 -0.04773581 0.03746637 -0.03521175
0.06955440 -0.03849038 0.01052293 0.01736112 0.10867710 0.08748853 0.00329740 0.10907028 0.07913758 0.10393069
0.02091867 0.11594022 0.13182420 0.09879354 0.05362710 -0.06745391 -0.00701261 0.00524702 0.03236255 0.01407916
0.02207983 0.02537322 0.04547948 0.07200756 0.03129894 -0.06274468 0.02107014 0.06035208 0.08636236 0.00453164
0.02193363 0.02309801 0.05568166 -0.02645093 0.04448695 0.02837519 0.08975695 0.04461517 0.08975355 0.07514391
0.02306982 0.10410084 0.06368385 0.05943465 0.00458421 0.05220337 0.06675851 0.08358569 0.06712102 0.06559004
-0.03930481 -0.00915937 -0.05897915 0.02816453 0.05032348 0.06780671 0.03377650 -0.00060942 -0.01795146 -0.03083684
-0.01302475 -0.02972313 0.00788707 -0.03525961 -0.00250398 0.05245084 0.11791293 -0.02167498 0.05299332 0.06640524
0.05190265 -0.00827317 0.03033127 0.05842173 -0.00401050 -0.00625106 0.05862958 -0.02465461 0.05546781 -0.08228195 ....
|
数值居然同HOGDescriptor::getDefaultPeopleDetector()中的是一样!那么,你就可以用上面的OLT去trainning你的样本,然后把得到的model数据替换getDefaultPeopleDetector()中的数据,你便可以去进行你要的目标检测啦!
为了验证这一想法的正确性和可行性,笔者做了些实验,在Ubuntu10.4 g++4.4.5环境中,步骤如下:
- 下载release版的程序:OLTbinaries.zip
- 下载样本:INRIAPerson.tar
- 在目录OLTbinaries/下建立两个文件夹test, train. 将INRIAperson/Test/neg拷贝到test/下,INRIAperson/Train/neg拷贝到train/下;将INRIAperson/test_64x128_H96拷贝到test/下重命名为pos,将INRIAperson/train_64x128_H96拷贝到train/下重命名为pos;
- 将test/neg , test/pos各自文件夹中的所有图片文件名分别输出到neg.list, pos.list, 并放置在test/下; 同样地操作在train/。
<pre class="brush: shell; gutter: true">amadeuzou@ubuntu:~/Projects/opencv/OLTbinaries/test$ ls ./neg > neg.list amadeuzou@ubuntu:~/Projects/opencv/OLTbinaries/test$ ls ./pos > pos.list</pre>
- 到这里,样本数据便准备好了,那么,只要修改OLTbinaries/runall.sh相关参数然后这些此脚本,一小时左右的时间,便会在OLTbinaries/HOG/下产生一个model_4BiSVMLight.alt文件,你的模型数据便保存在这里面。到这里,你便成功trainning了一个model。
注意事项:
- runall.sh中第5行,按你的正负样本数目修改:
1
HardOption=" --poscases 2416 --negcases 1218 " - runall.sh中第21行,按你的样本文件夹所在(InDir)及输出文件所在(OutDir)修改:
1234
OutDir=./HOGInDir=./OutFile=$OutDir/recordCMDLINE=$OutDir/record - trainning过程中会产生两个G左右的临时文件在OutDir(=./HOG)中,所以要确保硬盘空间足够,结束时删除,只留model_4BiSVMLight.alt。
- 整个trainning过程分4步,有4条屏幕输出信息,最可能出现的错误是样本文件路径不对,可在pos.list neg.list 中用图像文件的绝对路径。
1234
First iteration completeHard examples createdDoing second learningSecond iteration complete - 如果你用的是自己的样本,注意修改其他参数(待究),如正样本的大小:
12
WIDTH=64; export WIDTHHEIGHT=128; export HEIGHT
有了模型,怎么去做目标检测呢?你可以做以下的试验:
- 使用bin在OLTbinaries/bin/中classify_rhog: classify_rhog [待检测图片] [目标位置数据结果保存的文本文件] [模型文件] -i [位置画在图像文件]
amadeuzou@ubuntu:~/Projects/opencv/OLTbinaries/bin$ ./classify_rhog person-1.jpg result.txt model_4BiSVMLight.alt -i result.jpg
- 使用lib在OLTbinaries/lib/中:
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798
/** =============================================** Filename: lib-detector.cpp** Description: Code to detect object** Compiler: gcc** Author: Amadeu zou* URL: www.youtueye.com** =============================================*/#include <cv.h>#include <highgui.h>#include <string>#include <iostream>#include <algorithm>#include <iterator>#include <X11/Xlib.h>#include <Imlib2.h>#include <lear/interface/windetect.h>// change this path as appropriate.usingnamespacestd;std::list<DetectedRegion> detector(char* imgf, WinDetectClassify windetect, LinearClassify* classifier){std::list<DetectedRegion> detections;// read imageImlib_Image image = imlib_load_image(imgf);// if the load was successfulif(image){// set the image we loaded as the current context image to work onimlib_context_set_image(image);}else{//std::cerr << "Unable to read image: " << argv[1] << std::endl;returndetections;}intwidth = imlib_image_get_width(),height = imlib_image_get_height();typedefunsignedcharuchar;DATA32* data = imlib_image_get_data_for_reading_only();uchar* imagedata =newuchar[3*width*height*sizeof(uchar)];for(intj= 0; j< height; ++j)for(inti= 0; i< width; ++i) {uchar* pixel = imagedata+(i+j*width)*3;intargb = data[i+j*width];pixel[0] =static_cast<uchar>((argb & 0x00FF0000)>>16);pixel[1] =static_cast<uchar>((argb & 0x0000FF00)>> 8);pixel[2] =static_cast<uchar>((argb & 0x000000FF) );}imlib_free_image();// now get detectionswindetect.test(*classifier, detections, imagedata, width, height);delete[] imagedata;returndetections;}intmain(intargc,char** argv) {if(argc != 4) {std::cout <<"Error"<< std::endl;return0;}charmodelpath[256];strcpy(modelpath,argv[2]);string model_file(modelpath) ;// initialize the person detector. All default parameters are set for person detector.WinDetectClassify windetect;// use default person detector.RHOGDenseParam desc;LinearClassify* classifier = NULL;// initialize it to 64x128 person detector.classifier =newLinearClassify(model_file, 0);windetect.init(&desc);// initialize the descriptor computationstd::list<DetectedRegion> detections;detections = detector(argv[1], windetect, classifier);//draw region in imageIplImage* img = cvLoadImage(argv[1],1);for(list<DetectedRegion>::iterator itor=detections.begin();itor!=detections.end();++itor){cvRectangle(img,cvPoint(itor->x,itor->y),cvPoint(itor->x+itor->width,itor->y+itor->height),cvScalar(0,0,255),2);}cvSaveImage(argv[3],img);cvReleaseImage(&img);//print detectionsstd::copy(detections.begin(), detections.end(), std::ostream_iterator<DetectedRegion>(std::cout,"\n"));return0;}编译:
1g++ `pkg-config --cflags --libs opencv` -O3 -o lib-detector lib-detector.cpp -I. -I/usr/include-L. -lcmdline -lcvip -lclassifier -llearutil -lboost_filesystem-gcc -lblitz -L/usr/lib-lImlib2 -lfreetype -lz -L/usr/X11R6/lib-lX11 -lXext -ldl -lm结果:
1234amadeuzou@ubuntu:~/Projects/opencv/OLTbinaries/lib$ ./lib-detectorperson-1.jpg model_4BiSVMLight.alt result.jpg298 215 145 290 2.2674 1.10256009e-0113 9 237 475 3.71704 1.31164089e-01234 -7 230 460 3.59693 1.35627717e-01 - 使用源码在learcode/app/中classify_rhog.cpp:配置及使用见learcode/README
- 将模型数据copy到opencv的getDefaultPeopleDetector(),但要重新编译objdetect链接,没必要这么搞。从model中读取数据如下(共classifier->length() = 3780 + 1数据,1为classifier->linearbias):
1234567891011121314151617181920
intmain(intargc,char** argv) {string model_file ="model_4BiSVMLight.alt";LinearClassify* classifier = NULL;classifier =newLinearClassify(model_file, 0);cout<<"classifier->length() : "<<classifier->length()<<endl;FILE* fptr =fopen("hog.txt","w");for(inti = 0;i<classifier->length();i++){fprintf(fptr,"%1.8f ",classifier->linearwt[i]);if(0 == (i+1) % 10)fprintf(fptr,"%s","\n");}fprintf(fptr,"%1.8f ",classifier->linearbias);fclose(fptr);return0;} - opencv直接载入model: 将peopeldetect.cpp第49行hog.setSVMDetector(HOGDescriptor::getDefaultPeopleDetector())里面的vector换成你的模板数据,模板数据从model_4BiSVMLight.alt中载入,方法参照learcode/lib/windetect.cpp第1175行处LinearClassify::LinearClassify(std::string& modelfile, const int verbose):
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226
/** =====================================================================================** Filename: model-detector.cpp** Description: Code to detect object** Compiler: gcc** Author: Amadeu zou* URL: www.youtueye.com** =====================================================================================*/#include <fstream>#include <iostream>#include <vector>#include <stdio.h>#include <string.h>#include <ctype.h>#include "opencv2/imgproc/imgproc.hpp"#include "opencv2/objdetect/objdetect.hpp"#include "opencv2/highgui/highgui.hpp"usingnamespacestd;usingnamespacecv;vector<float> load_lear_model(constchar* model_file){vector<float> detector;FILE*modelfl;if((modelfl =fopen(model_file,"rb")) == NULL){cout<<"Unable to open the modelfile"<<endl;returndetector;}charversion_buffer[10];if(!fread(&version_buffer,sizeof(char),10,modelfl)){cout<<"Unable to read version"<<endl;returndetector;}if(strcmp(version_buffer,"V6.01")){cout<<"Version of model-file does not match version of svm_classify!"<<endl;returndetector;}// read version numberintversion = 0;if(!fread(&version,sizeof(int),1,modelfl)){cout<<"Unable to read version number"<<endl;returndetector;}if(version < 200){cout<<"Does not support model file compiled for light version"<<endl;returndetector;}longkernel_type;fread(&(kernel_type),sizeof(long),1,modelfl);{// ignore theselongpoly_degree;fread(&(poly_degree),sizeof(long),1,modelfl);doublerbf_gamma;fread(&(rbf_gamma),sizeof(double),1,modelfl);doublecoef_lin;fread(&(coef_lin),sizeof(double),1,modelfl);doublecoef_const;fread(&(coef_const),sizeof(double),1,modelfl);longl;fread(&l,sizeof(long),1,modelfl);char* custom =newchar[l];fread(custom,sizeof(char),l,modelfl);delete[] custom;}longtotwords;fread(&(totwords),sizeof(long),1,modelfl);{// ignore theselongtotdoc;fread(&(totdoc),sizeof(long),1,modelfl);longsv_num;fread(&(sv_num),sizeof(long),1,modelfl);}doublelinearbias = 0.0;fread(&linearbias,sizeof(double),1,modelfl);if(kernel_type == 0) {/* linear kernel *//* save linear wts also */double* linearwt =newdouble[totwords+1];intlength = totwords;fread(linearwt,sizeof(double),totwords+1,modelfl);for(inti = 0;i<totwords;i++){floatterm = linearwt[i];detector.push_back(term);}floatterm = -linearbias;detector.push_back(term);delete[] linearwt;}else{cout<<"Only supports linear SVM model files"<<endl;}fclose(modelfl);returndetector;}voidhelp(){printf("\nDemonstrate the use of the HoG descriptor using\n"" HOGDescriptor::hog.setSVMDetector(HOGDescriptor::getDefaultPeopleDetector());\n""Usage:\n""./peopledetect (<image_filename> | <image_list>.txt)\n\n");}intmain(intargc,char** argv){Mat img;FILE* f = 0;char_filename[1024];if( argc != 3 ){cout<<"ERROR"<<endl;return0;}img = imread(argv[1]);if( img.data ){strcpy(_filename, argv[1]);}else{f =fopen(argv[1],"rt");if(!f){fprintf( stderr,"ERROR: the specified file could not be loaded\n");return-1;}}HOGDescriptor hog;//hog.setSVMDetector(HOGDescriptor::getDefaultPeopleDetector());vector<float> detector = load_lear_model(argv[2]);hog.setSVMDetector(detector);namedWindow("people detector", 1);for(;;){char* filename = _filename;if(f){if(!fgets(filename, (int)sizeof(_filename)-2, f))break;//while(*filename && isspace(*filename))// ++filename;if(filename[0] =='#')continue;intl =strlen(filename);while(l > 0 &&isspace(filename[l-1]))--l;filename[l] ='\0';img = imread(filename);}printf("%s:\n", filename);if(!img.data)continue;fflush(stdout);vector<Rect> found, found_filtered;doublet = (double)getTickCount();// run the detector with default parameters. to get a higher hit-rate// (and more false alarms, respectively), decrease the hitThreshold and// groupThreshold (set groupThreshold to 0 to turn off the grouping completely).hog.detectMultiScale(img, found, 0, Size(8,8), Size(32,32), 1.05, 2);t = (double)getTickCount() - t;printf("tdetection time = %gms\n", t*1000./cv::getTickFrequency());size_ti, j;for( i = 0; i < found.size(); i++ ){Rect r = found[i];for( j = 0; j < found.size(); j++ )if( j != i && (r & found[j]) == r)break;if( j == found.size() )found_filtered.push_back(r);}for( i = 0; i < found_filtered.size(); i++ ){Rect r = found_filtered[i];// the HOG detector returns slightly larger rectangles than the real objects.// so we slightly shrink the rectangles to get a nicer output.r.x += cvRound(r.width*0.1);r.width = cvRound(r.width*0.8);r.y += cvRound(r.height*0.07);r.height = cvRound(r.height*0.8);rectangle(img, r.tl(), r.br(), cv::Scalar(0,255,0), 3);}imshow("people detector", img);intc = waitKey(0) & 255;if( c =='q'|| c =='Q'|| !f)break;}if(f)fclose(f);return0;}结果:
1amadeuzou@ubuntu:~/Projects/opencv/OLTbinaries/lib$ ./model-detectperson-1.jpg model_4BiSVMLight.alt
到这里,opencv peopledetect从模型训练到目标检测的过程便完成了,训练过程用的是算法作者的程序,不知opencv是否已将这一过程融入。
下一篇我将介绍opencv latent svm detect从模型训练到目标检测的过程。


743

被折叠的 条评论
为什么被折叠?



