dlib开源库中人脸检测部分代码(在dlib_face_detection.cpp中)的流程梳理,其中涉及到图像金字塔(双线性插值),hog特征提取,svm分类,以及其中的一些trick。
图像金字塔 ——》对每一级图像进行hog提取特征——》svm分类(是否存在人脸)
在dlib_face_detection.cpp的main函数开始,
执行 frontal_face_detector detector = get_frontal_face_detector(); 调用 frontal_face_detection.h 19:
typedef object_detector<scan_fhog_pyramid<pyramid_down<6> > > frontal_face_detector;
inline const std::string get_serialized_frontal_faces();
//获取已经训练好的模型,是字符串形式,deserialize后得到模型文件
inline frontal_face_detector get_frontal_face_detector()
{
std::istringstream sin(get_serialized_frontal_faces());
frontal_face_detector detector;
deserialize(detector, sin);
return detector;
}
执行 deserialize(detector, sin); 调用 object_detector.h 240:
template <typename T>
void deserialize (
object_detector<T>& item,
std::istream& in
)
{
int version = 0;
deserialize(version, in);
if (version == 1)
{
deserialize(item.scanner, in);
item.w.resize(1);
deserialize(item.w[0].w, in);
item.w[0].init(item.scanner);
deserialize(item.boxes_overlap, in);
}
else if (version == 2)
{
deserialize(item.scanner, in);
deserialize(item.boxes_overlap, in);
unsigned long num_detectors = 0;
deserialize(num_detectors, in);
item.w.resize(num_detectors);
for (unsigned long i = 0; i < item.w.size(); ++i)
{
deserialize(item.w[i].w, in);
item.w[i].init(item.scanner);
}
}
else
{
throw serialization_error("Unexpected version encountered while deserializing a dlib::object_detector object.");
}
}
执行253 deserialize(item.scanner, in); 调用 scan_fhog_pyramid.h 495:
template <typename T, typename U>
void deserialize (
scan_fhog_pyramid<T,U>& item,
std::istream& in
)
{
int version = 0;
deserialize(version, in);
if (version != 1)
throw serialization_error("Unsupported version found when deserializing a scan_fhog_pyramid object.");
deserialize(item.fe, in);
deserialize(item.feats, in);
deserialize(item.cell_size, in);
deserialize(item.padding, in);
deserialize(item.window_width, in);
deserialize(item.window_height, in);
deserialize(item.max_pyramid_levels, in);
deserialize(item.min_pyramid_layer_width, in);
deserialize(item.min_pyramid_layer_height, in);
deserialize(item.nuclear_norm_regularization_strength, in);
// When developing some feature extractor, it's easy to accidentally change its
// number of dimensions and then try to deserialize data from an older version of
// your extractor into the current code. This check is here to catch that kind of
// user error.
long dims;
deserialize(dims, in);
if (item.get_num_dimensions() != dims)
throw serialization_error("Number of dimensions in serialized scan_fhog_pyramid doesn't match the expected number.");
}
基本参数,fe, feats,cell_size,padding, window_width, window_height等
执行254 deserialize(item.boxes_overlap, in); 调用 box_overlap_testing.h 136
inline void deserialize (
test_box_overlap& item,
std::istream& in
)
{
double percent_covered_thresh, iou_thresh;
deserialize(iou_thresh, in);
deserialize(percent_covered_thresh, in);
item = test_box_overlap(iou_thresh, percent_covered_thresh);
}
num_detectors = 5 //front, left,right, front left rotated, front right rotated人脸的五面
for (unsigned long i = 0; i < item.w.size(); ++i)
{
deserialize(item.w[i].w, in);
item.w[i].init(item.scanner);
}
对w进行赋值, w的size = 5
//初始化部分结束
然后加载图像:
load_image(img,argv[i]);
做图像金字塔:
pyramid_up(img);
在图像金字塔中首先将图像扩大至原来的二倍,再把图像尺寸变为上一张图像的5/6,直到图像尺寸小于64*64。
调用 interpolation.h 1602
if (image_size(in_img) == 0)
{
set_image_size(out_img, 0, 0);
return;
}
rectangle rect = get_rect(in_img);
rectangle uprect = pyr.rect_up(rect);
if (uprect.is_empty())
{
set_image_size(out_img, 0, 0);
return;
}
set_image_size(out_img, uprect.bottom()+1, uprect.right()+1);
resize_image(in_img, out_img, interp);
//图像扩大至原来的2倍结束
然后进行人脸检测
std::vector<rectangle> dets = detector(img);
调用object_detector.h 505
template <
typename image_scanner_type
>
template <
typename image_type
>
std::vector<rectangle> object_detector<image_scanner_type>::
operator() (
const image_type& img,
double adjust_threshold
)
{
std::vector<rect_detection> dets;
(*this)(img,dets,adjust_threshold);
std::vector<rectangle> final_dets(dets.size());
for (unsigned long i = 0; i < dets.size(); ++i)
final_dets[i] = dets[i].rect;
return final_dets;
}
执行507 (*this)(img,dets,adjus