OpenCV HOG+SVM行人检测：从训练到检测

最新推荐文章于 2021-02-24 21:08:57 发布

冰__蓝

最新推荐文章于 2021-02-24 21:08:57 发布

阅读量7.5k

点赞数 4

分类专栏：图像处理 C++

C++ 同时被 2 个专栏收录

8 篇文章 0 订阅

订阅专栏

图像处理

5 篇文章 0 订阅

订阅专栏

转载自该文章格式进行修整

以现在使用的OpenCV 2.4.10为例，行人检测的Demo在“D:\opencv\sources\samples\cpp\peopledetect.cpp”下，源代码如下所示：

#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/objdetect/objdetect.hpp"
#include "opencv2/highgui/highgui.hpp"

#include <stdio.h>
#include <string.h>
#include <ctype.h>

using namespace cv;
using namespace std;

// static void help()
// {
//     printf(
//             "\nDemonstrate the use of the HoG descriptor using\n"
//             "  HOGDescriptor::hog.setSVMDetector(HOGDescriptor::getDefaultPeopleDetector());\n"
//             "Usage:\n"
//             "./peopledetect (<image_filename> | <image_list>.txt)\n\n");
// }

int main(int argc, char** argv)
{
    Mat img;
    FILE* f = 0;
    char _filename[1024];

    if( argc == 1 )
    {
        printf("Usage: peopledetect (<image_filename> | <image_list>.txt)\n");
        return 0;
    }
    img = imread(argv[1]);

    if( img.data )
    {
        strcpy(_filename, argv[1]);
    }
    else
    {
        f = fopen(argv[1], "rt");
        if(!f)
        {
            fprintf( stderr, "ERROR: the specified file could not be loaded\n");
            return -1;
        }
    }

    HOGDescriptor hog;
    hog.setSVMDetector(HOGDescriptor::getDefaultPeopleDetector());
    namedWindow("people detector", 1);

    for(;;)
    {
        char* filename = _filename;
        if(f)
        {
            if(!fgets(filename, (int)sizeof(_filename)-2, f))
                break;
            //while(*filename && isspace(*filename))
            //  ++filename;
            if(filename[0] == '#')
                continue;
            int l = (int)strlen(filename);
            while(l > 0 && isspace(filename[l-1]))
                --l;
            filename[l] = '\0';
            img = imread(filename);
        }
        printf("%s:\n", filename);
        if(!img.data)
            continue;

        fflush(stdout);
        vector<Rect> found, found_filtered;
        double t = (double)getTickCount();
        // run the detector with default parameters. to get a higher hit-rate
        // (and more false alarms, respectively), decrease the hitThreshold and
        // groupThreshold (set groupThreshold to 0 to turn off the grouping completely).
        hog.detectMultiScale(img, found, 0, Size(8,8), Size(32,32), 1.05, 2);
        t = (double)getTickCount() - t;
        printf("tdetection time = %gms\n", t*1000./cv::getTickFrequency());
        size_t i, j;
        for( i = 0; i < found.size(); i++ )
        {
            Rect r = found[i];
            for( j = 0; j < found.size(); j++ )
                if( j != i && (r & found[j]) == r)
                    break;
            if( j == found.size() )
                found_filtered.push_back(r);
        }
        for( i = 0; i < found_filtered.size(); i++ )
        {
            Rect r = found_filtered[i];
            // the HOG detector returns slightly larger rectangles than the real objects.
            // so we slightly shrink the rectangles to get a nicer output.
            r.x += cvRound(r.width*0.1);
            r.width = cvRound(r.width*0.8);
            r.y += cvRound(r.height*0.07);
            r.height = cvRound(r.height*0.8);
            rectangle(img, r.tl(), r.br(), cv::Scalar(0,255,0), 3);
        }
        imshow("people detector", img);
        int c = waitKey(0) & 255;
        if( c == 'q' || c == 'Q' || !f)
            break;
    }
    if(f)
        fclose(f);
    return 0;
}

在该Demo里，没有类似人类检测时的分类器（模型）的载入过程，而是用下面的语句载入默认模型：

hog.setSVMDetector(HOGDescriptor::getDefaultPeopleDetector());

该函数的源码位于 D:\opencv\sources\modules\objdetect\src\hog.cpp 文件中第 1081 行开始，截取片段如下所示：

vector<float> HOGDescriptor::getDefaultPeopleDetector()
{
    static const float detector[] = {
       0.05359386f, -0.14721455f, -0.05532170f, 0.05077307f,
       0.11547081f, -0.04268804f, 0.04635834f, -0.05468199f, 0.08232084f,
       0.10424068f, -0.02294518f, 0.01108519f, 0.01378693f, 0.11193510f,
       0.01268418f, 0.08528346f, -0.06309239f, 0.13054633f, 0.08100729f,
       -0.05209739f, -0.04315529f, 0.09341384f, 0.11035026f, -0.07596218f,
       -0.05517511f, -0.04465296f, 0.02947334f, 0.04555536f,
       -3.55954492e-003f, 0.07818956f, 0.07730991f, 0.07890715f, 0.06222893f,
       0.09001380f, -0.03574381f, 0.03414327f, 0.05677258f, -0.04773581f,
       0.03746637f, -0.03521175f, 0.06955440f, -0.03849038f, 0.01052293f,
       0.01736112f, 0.10867710f, 0.08748853f, 3.29739624e-003f, 0.10907028f,
       0.07913758f, 0.10393070f, 0.02091867f, 0.11594022f, 0.13182420f,

从上可知，行人检测所用到的模型数据被作为常量写在源代码里面，有如下问题：

1）这些模型数据是如何得来的？

2）如果使用我自己的样本进行trainning,使用得到的model去detect该如何操作？

首先，从OpenCV所使用到的行人检测算法说起，OpenCV行人检测所用到的算法源自Navneet Dalal和Bil Triggs 在2005年 CVPR 上的文章Histograms of Oriented Gradients for Human Detection 作者所在的研究机构（INRIA：French National Institute for Research in Computer Science and Control，法国国家计算机技术和控制研究所）发布了这套算法的源码INTRIA Object Detection and Localization Toolkit 。

那么，OpenCV是否原封不动的使用了这套算法呢？为了求证该问题，笔者比较了两者的模型数据，下载OLTbinaries.zip 找到 /HOG/model_4BiSVMLight.alt 文件（二进制形式的数据），用代码将其中数据保存为文本格式（这个要根据上面的源码learncode），其内容如下：

      0.05359386f, -0.14721455f, -0.05532170f, 0.05077307f,
       0.11547081f, -0.04268804f, 0.04635834f, -0.05468199f, 0.08232084f,
       0.10424068f, -0.02294518f, 0.01108519f, 0.01378693f, 0.11193510f,
       0.01268418f, 0.08528346f, -0.06309239f, 0.13054633f, 0.08100729f,
       -0.05209739f, -0.04315529f, 0.09341384f, 0.11035026f, -0.07596218f,
       -0.05517511f, -0.04465296f, 0.02947334f, 0.04555536f,
       -3.55954492e-003f, 0.07818956f, 0.07730991f, 0.07890715f, 0.06222893f,
       0.09001380f, -0.03574381f, 0.03414327f, 0.05677258f, -0.04773581f,
       0.03746637f, -0.03521175f, 0.06955440f, -0.03849038f, 0.01052293f,
       0.01736112f, 0.10867710f, 0.08748853f, 3.29739624e-003f, 0.10907028f,
       0.07913758f, 0.10393070f, 0.02091867f, 0.11594022f, 0.13182420f,

由此可见，两者的数据一模一样。那么你使用OLT去trainning你的样本，把得到的model替换 getDefaultPeopleDetector() 中的数据，就可以进行你自己的目标检测了！

---------------------------------------------------------------------------------------------------------

训练样本

为了验证这一想法的正确性和可行性，笔者进行了实验，使用的环境为 Ubuntu10.4 g++ 4.4.5:

具体实现步骤如下：

1）下载release版本的程序：：OLTbinaries.zip

2）下载样本：INRIAPerson.tar

3）在OLTbinaries/ 下建立两个文件夹 test, train，将INRIAperson/Test/neg拷贝到 test/ 下，INRIAperson/Train/neg拷贝到 train/ 下；将INRIAperson/test_64x128_H96拷贝到 test/ 下，重命名为 pos， INRIAperson/train_64x128_H96 拷贝到 train/ 下，重命名为 pos;

4）将 test/neg， test/pos 各自文件夹中的所有图片文件名分别输出到 neg.list, pos.list，并放置在 test/ 下，同样的操作在 train/。

<pre class="brush: shell; gutter: true">amadeuzou@ubuntu:~/Projects/opencv/OLTbinaries/test$ ls ./neg > neg.list
amadeuzou@ubuntu:~/Projects/opencv/OLTbinaries/test$ ls ./pos > pos.list</pre>

5）到这里，样本数据便准备好了，只要修改 OLTbinaries/runall.sh 相关参数，然后运行这些脚本，一个小时左右的时间，便会在 OLTbinaries/HOG 下产生一个 model_4BiSVMLight.alt文件，模型数据便保存在这里，到这里，你便成功 trainning 了一个 model。

注意事项：

runall.sh 中第 5 行，按你的正负样本数目修改：

HardOption=" --poscases 2416 --negcases 12180 "

runall.sh 中第 21 行，样本文件夹及输出文件夹所在位置：

OutDir=HOG

OutFile=$OutDir/record
CMDLINE=$OutDir/record

trainning 过程中会产生 2 个 G 左右的临时文件在 OutDir（=./HOG）中，结束时删除，只保留model_4BiSVMLight.alt。
整个 trainning 过程分为4步，有4条屏幕输出信息：

First
 iteration complete

Hard
 examples created

Doing
 second learning

Second
 iteration complete

如果使用的是自己的样本，注意修改其他参数（待纠），如正样本的大小：

WIDTH=64; export WIDTH
HEIGHT=128; export HEIGHT

检测过程待补

冰__蓝

关注

4
点赞
踩
9

收藏

觉得还不错? 一键收藏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫

专栏目录