人脸检测【dlib源码分析】

最新推荐文章于 2024-10-08 21:48:03 发布

chiukeung

最新推荐文章于 2024-10-08 21:48:03 发布

阅读量5.3k

点赞数

分类专栏：图像处理

本文链接：https://blog.csdn.net/chiukeung/article/details/79923558

版权

本文详细分析了dlib开源库中的人脸检测过程，包括图像金字塔构建、HOG特征提取、SVM分类以及一些关键技巧。通过构建图像金字塔，使用双线性插值放大图像，提取HOG特征并利用预训练的SVM模型进行人脸检测。在特征提取阶段，计算HOG特征时采用了快速确定梯度方向的方法，并进行了归一化处理。

摘要由CSDN通过智能技术生成

dlib开源库中人脸检测部分代码（在dlib_face_detection.cpp中）的流程梳理，其中涉及到图像金字塔（双线性插值），hog特征提取，svm分类，以及其中的一些trick。

图像金字塔 ——》对每一级图像进行hog提取特征——》svm分类（是否存在人脸）

在dlib_face_detection.cpp的main函数开始，

执行 frontal_face_detector detector = get_frontal_face_detector(); 调用 frontal_face_detection.h 19:

typedef object_detector<scan_fhog_pyramid<pyramid_down<6> > > frontal_face_detector;
    inline const std::string get_serialized_frontal_faces();
    //获取已经训练好的模型，是字符串形式，deserialize后得到模型文件
    inline frontal_face_detector get_frontal_face_detector()
    {
        std::istringstream sin(get_serialized_frontal_faces());
        frontal_face_detector detector;
        deserialize(detector, sin); 
        return detector;
    }

执行 deserialize(detector, sin); 调用 object_detector.h 240:

template <typename T>
    void deserialize (
        object_detector<T>& item,
        std::istream& in 
    )
    {
        int version = 0;
        deserialize(version, in);
        if (version == 1)
        {
            deserialize(item.scanner, in);
            item.w.resize(1);
            deserialize(item.w[0].w, in);
            item.w[0].init(item.scanner);
            deserialize(item.boxes_overlap, in);
        }
        else if (version == 2)
        {
            deserialize(item.scanner, in);
            deserialize(item.boxes_overlap, in);
            unsigned long num_detectors = 0;
            deserialize(num_detectors, in);
            item.w.resize(num_detectors);
            for (unsigned long i = 0; i < item.w.size(); ++i)
            {
                deserialize(item.w[i].w, in);
                item.w[i].init(item.scanner);
            }
        }
        else 
        {
            throw serialization_error("Unexpected version encountered while deserializing a dlib::object_detector object.");
        }
    }

执行253 deserialize(item.scanner, in); 调用 scan_fhog_pyramid.h 495:

template <typename T, typename U>
    void deserialize (
        scan_fhog_pyramid<T,U>& item,
        std::istream& in 
    )
    {
        int version = 0;
        deserialize(version, in);
        if (version != 1)
            throw serialization_error("Unsupported version found when deserializing a scan_fhog_pyramid object.");

        deserialize(item.fe, in);
        deserialize(item.feats, in);
        deserialize(item.cell_size, in);
        deserialize(item.padding, in);
        deserialize(item.window_width, in);
        deserialize(item.window_height, in);
        deserialize(item.max_pyramid_levels, in);
        deserialize(item.min_pyramid_layer_width, in);
        deserialize(item.min_pyramid_layer_height, in);
        deserialize(item.nuclear_norm_regularization_strength, in);

        // When developing some feature extractor, it's easy to accidentally change its
        // number of dimensions and then try to deserialize data from an older version of
        // your extractor into the current code.  This check is here to catch that kind of
        // user error.
        long dims;
        deserialize(dims, in);
        if (item.get_num_dimensions() != dims)
            throw serialization_error("Number of dimensions in serialized scan_fhog_pyramid doesn't match the expected number.");
    }

基本参数，fe, feats,cell_size,padding, window_width, window_height等

执行254 deserialize(item.boxes_overlap, in); 调用 box_overlap_testing.h 136

inline void deserialize (
        test_box_overlap& item,
        std::istream& in 
    )
    {
        double percent_covered_thresh, iou_thresh;
        deserialize(iou_thresh, in);
        deserialize(percent_covered_thresh, in);
        item = test_box_overlap(iou_thresh, percent_covered_thresh);
    }

num_detectors = 5 //front, left,right, front left rotated, front right rotated人脸的五面

for (unsigned long i = 0; i < item.w.size(); ++i)
            {
                deserialize(item.w[i].w, in);
                item.w[i].init(item.scanner);
            }

对w进行赋值， w的size = 5

//初始化部分结束

然后加载图像:

load_image(img,argv[i]);

做图像金字塔:

pyramid_up(img);

在图像金字塔中首先将图像扩大至原来的二倍，再把图像尺寸变为上一张图像的5/6，直到图像尺寸小于64*64。

调用 interpolation.h 1602

if (image_size(in_img) == 0)
        {
            set_image_size(out_img, 0, 0);
            return;
        }

        rectangle rect = get_rect(in_img);
        rectangle uprect = pyr.rect_up(rect);
        if (uprect.is_empty())
        {
            set_image_size(out_img, 0, 0);
            return;
        }
        set_image_size(out_img, uprect.bottom()+1, uprect.right()+1);

        resize_image(in_img, out_img, interp);

//图像扩大至原来的2倍结束

然后进行人脸检测

std::vector<rectangle> dets = detector(img);

调用object_detector.h 505

    template <
        typename image_scanner_type
        >
    template <
        typename image_type
        >
    std::vector<rectangle> object_detector<image_scanner_type>::
    operator() (
        const image_type& img,
        double adjust_threshold
    ) 
    {
        std::vector<rect_detection> dets;
        (*this)(img,dets,adjust_threshold);

        std::vector<rectangle> final_dets(dets.size());
        for (unsigned long i = 0; i < dets.size(); ++i)
            final_dets[i] = dets[i].rect;

        return final_dets;
    }

执行507 (*this)(img,dets,adjus