dlib人脸检测c语言具体实现

最新推荐文章于 2024-08-22 09:10:14 发布

置顶奔跑熊

最新推荐文章于 2024-08-22 09:10:14 发布

阅读量3.4k

点赞数 2

分类专栏：图像处理基础算法机器视觉人脸算法文章标签： dlib facedetect

本文链接：https://blog.csdn.net/yongjiankuang/article/details/82559574

版权

图像处理基础算法同时被 3 个专栏收录

24 篇文章 10 订阅

订阅专栏

机器视觉

5 篇文章 0 订阅

订阅专栏

人脸算法

2 篇文章 0 订阅

订阅专栏

dlib是一个很不错的视觉库，相比opencv，dlib里面的很多算法更接近工业实际，这个也只是个人的感觉。当然，opencv也很不错，涉及到的图像处理方方面面，为图像的开发者带来很多便利。最近一段时间在做hog这一块的检测算法的研究，opencv上实现的hog检测效果不如dlib的检测效果。一方面dlib采用的并非最原始的hog，而是fasthog，与hog相比，fhog是以cell为单元的提取最终的向量的，而hog是以block为单元提取最终的向量；另外dlib实现目标检测训练过程中，考虑了目标检测框到特征空间的映射，以及特征空间反映射回来框的IOU，这就在训练的过程中考虑了缩放层级的问题，从而使得提取的特征更为准确。dlib具备的优势使得在标签制作的过程中比较严格，因为dlib训练要求所有层级中mapped_rect,与truth_rect的IOU一定要大于0.5，不然会报错；其实这也很容易理解，如果mapped_rect与truth_rect的IOU都小于0.5，那么提取的特征肯定会比较差了。好了，说了一大堆的文字，可能没看源码的人会有些不理解，望见谅。本文主要实现的是dlib目标检测的前向过程，代码中做了一些修改，实际效果和原始的fhog基本一致。

首先是fhog特征提取

#ifndef _FHOG_H_
#define _FHOG_H_

#include "common.h"
#include "image.h"
#include "point.h"

image_t* extract_fhog(image_t *img);

point_t fhog_to_image(point_t p, int filter_rows_padding = 12, int filter_cols_padding = 12);

#endif

#include "fhog.h"


//quadrant
static int quadrant(int dx, int dy)
{
	int index = 0;

	if (dx > 0 && dy >= 0)
		index = 1;
	else if (dx < 0 && dy >= 0)
		index = 2;
	else if (dx < 0 && dy <= 0)
		index = 3;
	else if (dx > 0 && dy <= 0)
		index = 4;

	return index;
}


static float cmp_min(float a, float b)
{
	if (a > b)
		return b;
	else
		return a;
}

image_t* extract_fhog(image_t *img)
{
	//cell num
	int cell_nr = (int)((float)img->nr / (float)(cell_size)+0.5);
	int cell_nc = (int)((float)img->nc / (float)(cell_size)+0.5);

	if (cell_nr == 0 || cell_nc == 0)
		printf("cell num invalid\n");

	int bins = 8;
	//hist设置和初始化
	float *hist = (float*)malloc(cell_nr * cell_nc * 2 * bins * sizeof(float));
	float *norm = (float*)malloc(cell_nr * cell_nc * sizeof(float));

	for (int r = 0; r < cell_nr; r++)
	{
		for (int c = 0; c < cell_nc; c++)
		{
			norm[r * cell_nc + c] = 0;
			for (int k = 0; k < 2 * bins; k++)
			{
				hist[(r * cell_nc + c) * 2 * bins + k] = 0;
			}
		}
	}


	const int hog_nr = max(cell_nr - 2, 0);
	const int hog_nc = max(cell_nc - 2, 0);
	if (hog_nr == 0 || hog_nc == 0)
		printf("hog size invalid\n");

	const int padding_rows_offset = (cell_per_win - 1) / 2;
	const int padding_cols_offset = (cell_per_win - 1) / 2;
	const int hog_h = hog_nr + cell_per_win - 1;
	const int hog_w = hog_nc + cell_per_win - 1;

	image_t* hog = image_alloc(hog_w, hog_h, dims, IM_32FC1);
	float* feat = hog->data_f32;
	
	for (int o = 0; o < dims; o++)
	{
		for (int y = 0; y < hog_nr + cell_per_win - 1; y++)
		{
			for (int x = 0; x < hog_nc + cell_per_win - 1; x++)
			{
				feat[o * (hog_nr + cell_per_win - 1) * (hog_nc + cell_per_win - 1) + y * (hog_nc + cell_per_win - 1) + x] = 0;
			}
		}
	}


	const int visible_nr = (min(cell_nr * cell_size, img->nr)) - 1;
	const int visible_nc = (min(cell_nc * cell_size, img->nc)) - 1;

	int best_o = 0, flag = 0;
	int tempx = 0, tempy = 0;
	float absx = 0, absy = 0;
	float Dtan22 = 0, Dtan45 = 0, Dtan67 = 0, Dy = 0;
	float v = 0;
	float sum_data = 0;
	float shift_n = 8;

	int count_num = 0;
	for (int y = 1; y < visible_nr; y++)
	{
		for (int x = 1; x < visible_nc; x++)
		{
			tempx = img->data_u8[y * img->nc + x + 1] - img->data_u8[y * img->nc + x - 1];//((int)get_pixel_intensity(img[y][x + 1]) - (int)get_pixel_intensity(img[y][x - 1]));
			tempy = img->data_u8[(y + 1) * img->nc + x] - img->data_u8[(y - 1) * img->nc + x];

			absx = fabs((float)tempx);
			absy = fabs((float)tempy);

			v = absx + absy;

			Dtan22 = absx * tan22;
			Dtan45 = absx * tan45;
			Dtan67 = absx * tan67;

			Dy = absy;

			flag = 0;
			if (tempx == 0)
			{
				if (tempy == 0)
					best_o = 0;
				else
					best_o = 3;
			}
			else
			{
				if (Dy <= Dtan22)
				{
					flag = quadrant(tempx, tempy);

					if (flag == 1 || flag == 3)
						best_o = 0;
					else if (flag == 2 || flag == 4)
						best_o = 7;
				}
				else if (Dy > Dtan22 && Dy <= Dtan45)
				{
					flag = quadrant(tempx, tempy);
					if (flag == 1 || flag == 3)
						best_o = 1;
					else if (flag == 2 || flag == 4)
						best_o = 6;
				}
				else if (Dy > Dtan45 && Dy <= Dtan67)
				{
					flag = quadrant(tempx, tempy);
					if (flag == 1 || flag == 3)
						best_o = 2;
					else if (flag == 2 || flag == 4)
						best_o = 5;
				}
				else if (Dy > Dtan67)
				{
					flag = quadrant(tempx, tempy);
					if (flag == 1 || flag == 3)
						best_o = 3;
					else if (flag == 2 || flag == 4)
						best_o = 4;
				}
			}

			if (flag == 3 || flag == 4)
				best_o = best_o + bins;

			float xp = ((float)x + 0.5) / (float)bins - 0.5;
			float yp = ((float)y + 0.5) / (float)bins - 0.5;
			int ixp = (int)std::floor(xp);
			int iyp = (int)std::floor(yp);

			float vx0 = xp - ixp;
			float vy0 = yp - iyp;
			float vx1 = 1.0 - vx0;
			float vy1 = 1.0 - vy0;

			if (ixp >= 0 && iyp >= 0)
			{
				*(hist + iyp * cell_nc + ixp + best_o * cell_nr * cell_nc) += ((vx1 * vy1 * v));
			}

			if (ixp + 1 < cell_nc && iyp >= 0 && ixp + 1 >= 0)
			{
				*(hist + iyp * cell_nc + (ixp + 1) + best_o * cell_nr * cell_nc) += ((vx0 * vy1 * v));
			}

			if (ixp >= 0 && iyp + 1 < cell_nr && iyp + 1 >= 0)
			{
				*(hist + (iyp + 1) * cell_nc + ixp + best_o * cell_nr * cell_nc) += ((vx1 * vy0 * v));
			}

			if (ixp + 1 < cell_nc && iyp + 1 < cell_nr && ixp + 1 >= 0 && iyp + 1 >= 0)
			{
				*(hist + (ixp + 1) + (iyp + 1) * cell_nc + best_o * cell_nr * cell_nc) += ((vx0 * vy0 * v));
			}
		}
	}



	//norm
	for (int o = 0; o < bins; o++)
	{
		f32 *p1 = hist + o * cell_nr * cell_nc;
		f32 *p2 = hist + (o + bins) * cell_nr * cell_nc;
		f32 *dst = norm;
		f32 *end = norm + cell_nr * cell_nc;
		while (dst < end)
		{
			*(dst++) += (*p1 + *p2) * (*p1 + *p2);
			p1++;
			p2++;
		}
	}


	compute features
	float up_limit = 0.2;
	float temp_value[16];
	for (int y = 0; y < hog_nr; y++)
	{
		const int yy = y + padding_rows_offset;
		for (int x = 0; x < hog_nc; x++)
		{
			const int xx = x + padding_cols_offset;

			float *dst = feat + yy * hog_w + xx;
			float *src, *p;
			float n1, n2, n3, n4;

			p = norm + (y + 1) * cell_nc + x + 1;
			n1 = 1. / sqrt(*p + *(p + 1) + *(p + cell_nc) + *(p + cell_nc + 1));
			p = norm + (y + 1) * cell_nc + x;
			n2 = 1. / sqrt(*p + *(p + 1) + *(p + cell_nc) + *(p + cell_nc + 1));
			p = norm + y * cell_nc + x + 1;
			n3 = 1. / sqrt(*p + *(p + 1) + *(p + cell_nc) + *(p + cell_nc + 1));
			p = norm + y * cell_nc + x;
			n4 = 1. / sqrt(*p + *(p + 1) + *(p + cell_nc) + *(p + cell_nc + 1));


			//16 directions feature
			float sum_data = 0;
			src = hist + (y + 1) * cell_nc + x + 1;
			for (int o = 0; o < 2 * bins; o++)
			{
				//clip data
				float h1 = min(*src * n1, up_limit);
				float h2 = min(*src * n2, up_limit);
				float h3 = min(*src * n3, up_limit);
				float h4 = min(*src * n4, up_limit);

				sum_data = (h1 + h2 + h3 + h4) * 0.5;
				*dst = sum_data;
				temp_value[o] = sum_data;

				dst += hog_w * hog_h;
				src += cell_nr * cell_nc;
			}

			//8 directions feature
			src = hist + (y + 1) * cell_nc + x + 1;
			for (int o = 0; o < bins; o++)
			{
				f32 sum = temp_value[o] + temp_value[o + bins];
				float h1 = min(sum, up_limit);
				float h2 = min(sum, up_limit);
				float h3 = min(sum, up_limit);
				float h4 = min(sum, up_limit);

				sum_data = (h1 + h2 + h3 + h4) * 0.5;
				*dst = sum_data;

				dst += hog_w * hog_h;
				src += cell_nc * cell_nr;
			}
		}
	}

	free(hist);
	free(norm);

	return hog;
}




//hog空间坐标映射回image空间
point_t fhog_to_image(point_t p, int filter_rows_padding, int filter_cols_padding)
{
	point_t offset;
	point_t p0 = point_init(1,1);
	point_t p1 = point_init((filter_cols_padding - 1) / 2, (filter_rows_padding - 1) / 2);

	p = point_add(p, p0);
	p = point_sub(p, p1);
	p = point_mul(p, cell_size);
	p = point_add(p, p0);

	if (p.x >= 0 && p.y >= 0)
		offset = point_init(cell_size / 2, cell_size / 2);
	if (p.x < 0 && p.y >= 0)
		offset = point_init(-cell_size / 2, cell_size / 2);
	if (p.x >= 0 && p.y < 0)
		offset = point_init(cell_size / 2, -cell_size / 2);
	if (p.x < 0 && p.y < 0)
		offset = point_init(-cell_size / 2, -cell_size / 2);
	
	p = point_add(p, offset);

	return p;
}

其次就是对滤波后的特征图进行svm系数加权

#ifndef _FILTER_H_
#define _FILTER_H_


#include "common.h"
#include "image.h"


rect_t spatially_filter_image(image_t* in_img, image_t* out_img, image_t *filter);



#endif

#include "filter.h"


rect_t spatially_filter_image(image_t* in_img, image_t* out_img, image_t *filter)
{
	int w = in_img->nc;
	int h = in_img->nr;

	const int first_row = filter->nr / 2;
	const int first_col = filter->nc / 2;
	const int last_row = in_img->nr - ((filter->nr - 1) / 2);
	const int last_col = in_img->nc - ((filter->nc - 1) / 2);

	//有效的数据区域
	rect_t non_border;
	non_border.l = first_col;
	non_border.t = first_row;
	non_border.r = last_col;
	non_border.b = last_row;

	const int N = 10;
	for (int ch = 0; ch < dims; ch++)
	{
		f32 *img_ptr = in_img->data_f32 + ch * w * h;
		f32 *filter_ptr = filter->data_f32 + ch * cell_per_win * cell_per_win;

		for (int r = first_row; r < last_row; r++)
		{
			int c = first_col;
			for (; c < last_col; c++)
			{
				f32 value0 = 0, value1 = 0, value2 = 0;
				f32 temp0 = 0, temp1 = 0, temp2 = 0;
				for (int m = 0; m < filter->nr; m++)
				{
					int n = 0;
					for (; n < filter->nc - 2; n += 3)
					{
						value0 = img_ptr[(r - first_row + m) * w + c - first_col + n];
						value1 = img_ptr[(r - first_row + m) * w + c - first_col + n + 1];
						value2 = img_ptr[(r - first_row + m) * w + c - first_col + n + 2];

						f32 f0 = filter_ptr[m * cell_per_win + n];
						f32 f1 = filter_ptr[m * cell_per_win + n + 1];
						f32 f2 = filter_ptr[m * cell_per_win + n + 2];

						temp0 += value0 * f0;
						temp1 += value1 * f1;
						temp2 += value2 * f2;
					}

					for (; n < filter->nc; n++)
					{
						value0 = img_ptr[(r - first_row + m) * w + c - first_col + n];
						temp0 += value0 * (filter_ptr[m * cell_per_win + n]);
					}
				}

				temp0 += temp1 + temp2;

				out_img->data_f32[r * w + c] += temp0;
			}
		}
	}


	return non_border;
}

然后就是检测函数

#ifndef _DETECT_H_
#define _DETECT_H_

#include "common.h"
#include "image.h"
#include "fhog.h"
#include "filter.h"
#include "point.h"



int detection(image_t *img, image_t *filter, rect_detection_t *rects);



#endif

#include "detect.h"


static void sort_box(rect_detection_t *rects,int num)
{
	rect_detection_t temp;
	for (int i = 0; i < num; i++)
	{
		for (int j = i; j < num; j++)
		{
			if (rects[j].detection_confidence > rects[i].detection_confidence)
			{
				temp = rects[i];
				rects[i] = rects[j];
				rects[j] = temp;
			}
		}
	}
}

//box的iou
static int boxes_overlap(rect_detection_t a, rect_detection_t b)
{
	if (a.box.top > a.box.bottom || a.box.left > a.box.right ||
		b.box.top > b.box.bottom || b.box.left > b.box.right)
		return true;

	float a_area = (a.box.right - a.box.left + 1) * (a.box.bottom - a.box.top + 1);
	float b_area = (b.box.right - b.box.left + 1) * (b.box.bottom - b.box.top + 1);

	box_t box0;
	box0.left = max(a.box.left, b.box.left);
	box0.top = max(a.box.top, b.box.top);
	box0.right = min(a.box.right, b.box.right);
	box0.bottom = min(a.box.bottom, b.box.bottom);

	if (box0.top > box0.bottom || box0.left > box0.right)
		return false;
	float inner_area = (box0.right - box0.left + 1) * (box0.bottom - box0.top + 1);

	box0.left = min(a.box.left, b.box.left);
	box0.top = min(a.box.top, b.box.top);
	box0.right = max(a.box.right, b.box.right);
	box0.bottom = max(a.box.bottom, b.box.bottom);

	float outer_area = (box0.right - box0.left + 1) * (box0.bottom - box0.top + 1);

	if (inner_area / outer_area > match_thresh ||
		inner_area / a_area > overlap_thresh ||
		inner_area / b_area > overlap_thresh)
		return true;
	else
		return false;
}

static int overlaps_any_box(rect_detection_t *final_rects, rect_detection_t rect, int num)
{
	for (int i = 0; i < num; i++)
	{
		if (boxes_overlap(final_rects[i], rect))
			return 1;
	}

	return 0;
}


int detection(image_t *img, image_t *filter, rect_detection_t *final_rects)
{
	int count = 0;
	int w = img->nc;
	int h = img->nr;
	int levels = 0;
	int scalew = 0;
	int scaleh = 0;

	float *scale = (float*)malloc(max_pyramid_levels * sizeof(float));

	float scalef = 5./6;//缩放因子
	float tempw = w, temph = h;
	scale[0] = scalef;
	while (tempw >= min_pyramid_layer_width && temph >= min_pyramid_layer_height && levels < max_pyramid_levels)
	{
		tempw = w * scalef;
		temph = h * scalef;
		scale[levels] = scalef;
		levels++;
		scalef *= 5./6;
	}

	rect_t area;
	image_t *feat = NULL;
	image_t *scale_img = NULL;
	image_t *out_img = NULL;
	int i = 0;

	rect_detection_t* rects = (rect_detection_t*)malloc(2000 * sizeof(rect_detection_t));

	//逐层进行检测
	for (; i < levels; i++)
	{
		if (i == 0)
		{
			feat = extract_fhog(img);
			out_img = image_alloc(feat->nc, feat->nr, 1, IM_32FC1);
			for (int y = 0; y < feat->nr; y++)
			{
				for (int x = 0; x < feat->nc; x++)
				{
					out_img->data_f32[y * feat->nc + x] = 0;
				}
			}

			area = spatially_filter_image(feat, out_img, filter);
		}
		else
		{
			//尺度变化提取特征
			scalew = (int)((float)w * scale[i - 1]);
			scaleh = (int)((float)h * scale[i - 1]);

			scale_img = image_alloc(scalew, scaleh, 1, IM_8UC1);
			
			scale_bilinear(img,scale_img);
			feat = extract_fhog(scale_img);
			out_img = image_alloc(feat->nc, feat->nr, 1, IM_32FC1);

			for (int y = 0; y < feat->nr; y++)
			{
				for (int x = 0; x < feat->nc; x++)
				{
					out_img->data_f32[y * feat->nc + x] = 0;
				}
			}

			area = spatially_filter_image(feat, out_img, filter);
		}
		

		//对svm加权后的结果进行筛选
		//printf("-----------------------------\n");
		for (int r = area.t; r < area.b; r++)
		{
			for (int c = area.l; c < area.r; c++)
			{
				if (out_img->data_f32[r * out_img->nc + c] >= thresh)
				{
					point_t p = point_init(c,r);
					point_t p_tl, p_br;
					box_t b;

					p_tl.x = (p.x - (cell_per_win - 2 * padding) / 2);
					p_tl.y = (p.y - (cell_per_win - 2 * padding) / 2);
					p_br.x = (p_tl.x + (cell_per_win - 2 * padding) - 1);
					p_br.y = (p_tl.y + (cell_per_win - 2 * padding) - 1);

					
					p_tl = fhog_to_image(p_tl, cell_per_win, cell_per_win);
					p_br = fhog_to_image(p_br, cell_per_win, cell_per_win);

					p_tl = point_up(p_tl,i);
					p_br = point_up(p_br,i);
					
					//printf("value = %f,r = %d,c = %d,l = %d,t = %d,r = %d,b = %d\n", out_img->data_f32[r * out_img->nc + c], r, c, p_tl.x, p_tl.y, p_br.x, p_br.y);

					rects[count].box.left = p_tl.x;
					rects[count].box.top = p_tl.y;
					rects[count].box.right = p_br.x;
					rects[count].box.bottom = p_br.y;
					rects[count].detection_confidence = out_img->data_f32[r * out_img->nc + c];
					count++;
				}
			}
		}

		free(feat->data_f32);
		free(feat);

		free(out_img->data_f32);
		free(out_img);

	}

	//对rects进行排序
	sort_box(rects,count);

	//非极大值抑制
	int find_index = 0;
	for (int i = 0; i < count; i++)
	{
		if (overlaps_any_box(final_rects, rects[i],find_index))
			continue;

		final_rects[find_index++] = rects[i];
	}

	return find_index;
}

最后就是主函数：

#include "common.h"
#include "image.h"
#include "detect.h"
#include "detect_hog.h"
#include "io.h"
#include <fstream>
#include <iostream>
using namespace std;


//获取所有的文件名    
void GetAllFiles(string path, std::vector<string>& files, string format)
{

	long long   hFile = 0;
	//文件信息      
	struct _finddata_t fileinfo;//用来存储文件信息的结构体     
	int len = format.length();
	string p, temp;
	if ((hFile = _findfirst(p.assign(path).append("\\*").c_str(), &fileinfo)) != -1)  //第一次查找    
	{
		do
		{
			if ((fileinfo.attrib &  _A_SUBDIR))  //如果查找到的是文件夹    
			{
				if (strcmp(fileinfo.name, ".") != 0 && strcmp(fileinfo.name, "..") != 0)  //进入文件夹查找    
				{
					//files.push_back(p.assign(path).append("\\").append(fileinfo.name) );    
					GetAllFiles(p.assign(path).append("\\").append(fileinfo.name), files, format);
				}
			}
			else //如果查找到的不是是文件夹     
			{
				//files.push_back(p.assign(fileinfo.name) );  //将文件路径保存，也可以只保存文件名:    p.assign(path).append("\\").append(fileinfo.name)    
				temp = fileinfo.name;
				//判断字符串是否以format格式结尾  
				if (temp.length()>len && temp.compare(temp.length() - len, len, format) == 0)
					files.push_back(p.assign(path).append("\\").append(fileinfo.name));
			}

		} while (_findnext(hFile, &fileinfo) == 0);

		_findclose(hFile); //结束查找    
	}

}

image_t* load_image(Mat src)
{
	int h = src.rows;
	int w = src.cols;

	image_t* img = (image_t*)malloc(sizeof(image_t));
	img->nr = h;
	img->nc = w;
	img->ch = 1;
	img->size = w * h * sizeof(u8);
	img->type = IM_8UC1;

	img->data_u8 = (u8*)malloc(w * h * sizeof(u8));

	for (int y = 0; y < h; y++)
	{
		for (int x = 0; x < w; x++)
		{
			img->data_u8[y * w + x] = src.data[y * src.step + x];
		}
	}

	return img;
}

#define PIC 1
#define VIDEO 0

void main()
{
#if PIC
	
	image_t* img_gray ;
	image_t* img_filter = image_alloc(cell_per_win, cell_per_win, dims, IM_32FC1);
	rect_detection_t* result = (rect_detection_t*)malloc(2000 * sizeof(rect_detection_t));

	FILE *fp = fopen("weights3.txt", "rb");
	if (fread(img_filter->data_f32, cell_per_win * cell_per_win * dims * sizeof(float), 1, fp) != 1)
		printf("read file error\n");
	fclose(fp);

	int box_num = 0;

	std::vector<string> file_list;
	string file_path = "test1";

	GetAllFiles(file_path, file_list, ".jpg");

	int count = 0;
	char info[200];

	for (int i = 0; i < file_list.size(); i++)
	{
		Mat src = imread(file_list[i]);
		Mat gray = imread(file_list[i], 0);
		img_gray = load_image(gray);

		box_num = detection(img_gray, img_filter,result);
		for (int i = 0; i < box_num; i++)
		{
			Rect r;
			r.x = result[i].box.left;
			r.y = result[i].box.top;
			r.width = result[i].box.right - result[i].box.left + 1;
			r.height = result[i].box.bottom - result[i].box.top + 1;
			rectangle(src, r, Scalar(0, 255, 0), 2, 8, 0);
		}

		imshow("src", src);
		waitKey(2);

		sprintf(info,"result/%d.jpg",count++);
		imwrite(info,src);

		free(img_gray -> data_u8);
		free(img_gray);
	}

#endif

#if VIDEO

	Mat src_img;
	Mat gray_img;
	int box_num = 0;

	VideoCapture cap("0.mp4");


	
	image_t *img_gray;
	image_t* img_filter = image_alloc(cell_per_win, cell_per_win, dims, IM_32FC1);
	rect_detection_t* result = (rect_detection_t*)malloc(2000 * sizeof(rect_detection_t));

	FILE *fp = fopen("weights1.txt", "rb");
	if (fread(img_filter->data_f32, cell_per_win * cell_per_win * dims * sizeof(float), 1, fp) != 1)
		printf("read file error\n");
	fclose(fp);

	//float thresh = 1.4150455669441055;// 1.1907374298637405;

	int count = 0;
	char info[200];

	while (1)
	{
		cap >> src_img;
		//resize(src_img, src_img, Size(2 * src_img.cols, 2 * src_img.rows * 2));
		cvtColor(src_img, gray_img, CV_BGR2GRAY);
		
		img_gray = load_image(gray_img);
		box_num = detection(img_gray, img_filter, thresh, result);

		for (int i = 0; i < box_num; i++)
		{
			Rect r;
			r.x = result[i].box.left;
			r.y = result[i].box.top;
			r.width = result[i].box.right - result[i].box.left + 1;
			r.height = result[i].box.bottom - result[i].box.top + 1;
			rectangle(src_img, r, Scalar(0, 255, 0), 2, 8, 0);
		}

		imshow("src", src_img);
		waitKey(2);

		sprintf(info,"video3/%d.jpg",count++);
		imwrite(info,src_img);

		box_num = 0;

		free(img_gray->data_u8);
		free(img_gray);
	}

#endif
	int kk = 1;

}

附上一张效果图

文中的测试模型并不是最好的模型，可以自己通过训练得到更好的模型。如果想要dlib原始fhog一样的特征，可以自己稍加修改，不过本文的特征亲自实验了几把基本差不多。关于训练的东西需要自己多探索，我也是刚刚摸索不久，由于dlib测试程序太慢了，无奈之下只有自己亲自动手复现了一遍，水平有限，若有不当之处，请指教，谢谢！

源码下载：https://download.csdn.net/download/yongjiankuang/10662941

github: