人头检测代码示例:SVM+HOG

最近在做人头统计方面的研究,尝试了多种办法,其中包括Adaboost+Haar特征、HOG特征+线性SVM两大模型。SVM+HOG的模型之前多数被应用于行人检测,我在做人头统计的过程中考虑到人头的边缘形状比较明显,图像梯度比较容易提取,所以将该方法搬到人头统计上来,效果还不错。不足之处是多尺度的HOG计算太慢了,难以达到实时性要求,所以我采用的多数是单尺度检测(64*64的固定窗口大小)。

我现在的工作只是做了个入门,本文意在抛砖引玉,希望感兴趣的小伙伴能够一起研究。


1、预处理

从视频中采集了1416个人头正样本,以及1957个负样本。正样本主要就是人头部(头发)的图像,负样本是不包括人头目标的图像。然后将它们统一归一化到64*64的大小(我这里为了简化训练过程,将人头图像的大小设置为和窗口大小一致)。


2、人头训练

开发环境是winxp+vs2008+opencv2.3.1。训练代码主要包括MySVM.h,global.h和global.cpp三个文件。其中MySVM.h是SVM类定义文件,global.h和global.cpp分别是全局函数声明和定义文件。

(1)MySVM.h如下:

#pragma once

#ifndef  _MYSVM_H_
#define _MYSVM_H_

#include <opencv2/opencv.hpp>
#include <opencv2/core/core.hpp>
#include <opencv2/ml/ml.hpp>
#include <opencv2/highgui/highgui.hpp>

using namespace cv;

class MySVM: public CvSVM
{
public:
	int get_alpha_count()
	{
		return this->sv_total;
	}

	int get_sv_dim()
	{
		return this->var_all;
	}

	int get_sv_count()
	{
		return this->decision_func->sv_count;
	}

	double* get_alpha()
	{
		return this->decision_func->alpha;
	}

	float** get_sv()
	{
		return this->sv;
	}

	float get_rho()
	{
		return this->decision_func->rho;
	}
};

#endif
(2)global.h如下:
#include "MySVM.h"

#include <fstream>
#include <iostream>
#include <ctime>
using namespace std;

//函数名:Train
//函数功能:SVM训练每张图片的HOG特征
//参数说明:
//const char* positivePath:正样本路径
//int pCount:正样本个数
//const char* negativePath:负样本路径
//int nCount:负样本个数
//const char* classifierSavePath:分类器保存路径
//const char* detectorSavePath:检测器保存路径
//返回bool:训练是否成功(true:成功,false:失败)
bool Train(const char* positivePath, int pCount, const char* negativePath, int nCount, 
		   const char* classifierSavePath, const char* detectorSavePath);

//函数名:CalDimension
//函数功能:计算每张图片的HOG特征维度
//参数说明:
//CvSize winSize:窗口大小
//CvSize blockSize:块大小
//CvSize blockStride:块位移大小
//CvSize cellSize:胞元大小
//int nbins:bin数
//返回int:HOG特征维度
//参考计算方式详细:http://blog.csdn.net/carson2005/article/details/7782726
//参考参数说明详细:http://blog.csdn.net/raodotcong/article/details/6239431
int CalDimension(CvSize winSize, CvSize blockSize, CvSize blockStride,	CvSize cellSize, int nbins);

//函数名:DetectMulti
//函数功能:用SVM+HOG分类器对图片做多尺度检测
//参数说明:
//const char* detectorSavePath:检测器保存路径
//const char* testPath:测试视频路径
//返回bool:检测是否成功(true:成功,false:失败)
bool DetectMulti(const char* detectorSavePath, const char* testPath);

//函数名:DetectSingle
//函数功能:用SVM+HOG分类器对图片做单尺度检测
//参数说明:
//const char* detectorSavePath:检测器保存路径
//const char* testPath:测试视频路径
//返回bool:检测是否成功(true:成功,false:失败)
bool DetectSingle(const char* classifierSavePath, const char* testPath);
(3)global.cpp如下:

#include "global.h"

///参数设置///

CvSize winSize = cvSize(64, 64);	//等于训练样本图像大小
CvSize blockSize = cvSize(16, 16);	//block size
CvSize blockStride = cvSize(8, 8);	//block stride
CvSize winStride = cvSize(8, 8);	//window stride
CvSize cellSize = cvSize(8, 8);		//cell size
int nbins = 9;	//一般取9个梯度方向

函数定义//
int CalDimension(CvSize winSize, CvSize blockSize, CvSize blockStride,	CvSize cellSize, int nbins)
{
	//一个窗口(winSize)内宽和高方向分别有多少个块(blockSize)
	//int hBlockNum = (winSize.height - 1) / cellSize.height;
	//int wBlockNum = (winSize.width - 1) / cellSize.width;
	int hBlockNum = (winSize.height - blockSize.height) / blockStride.height + 1;
	int wBlockNum = (winSize.width - blockSize.width) / blockStride.width + 1;

	//一个块(blockSize)里面有多少个单元(cellSize)
	int hCellNum = blockSize.height / cellSize.height;
	int wCellNum = blockSize.width / cellSize.width;

	//一个单元(cellSize)里面有多少HOG特征维度
	int hogNum = nbins;

	//计算一个窗口的HOG特征维度:block的个数 * block内部cell的个数 * 每个cell的HOG特征维度
	int totalHogNum = (hBlockNum * wBlockNum) * (hCellNum * wCellNum) * hogNum;

	return totalHogNum;
}

bool Train(const char* positivePath, int pCount, const char* negativePath, int nCount, 
		   const char* classifierSavePath, const char* detectorSavePath)
{
	cout<<"******************** Train ********************"<<endl;

	//首先计算图像的HOG特征维度
	int dim = CalDimension(winSize, blockSize, blockStride, cellSize, nbins);
	int totalCount = pCount + nCount;

	cout<<"1: Start trainning for SVM:"<<endl;
	cout<<"total samples: "<<totalCount<<endl;
	cout<<"positive samples: "<<pCount<<endl;
	cout<<"negative samples: "<<nCount<<endl;
	cout<<"feature dimension is: "<<dim<<endl<<endl;

	//训练正样本
	cout<<"2: Start to train positive samples:"<<endl;

	CvMat *sampleFeaturesMat = cvCreateMat(totalCount , dim, CV_32FC1);
	//64*128的训练样本,该矩阵将是totalSample*3780
	//64*64的训练样本,该矩阵将是totalSample*1764
	cvSetZero(sampleFeaturesMat);  
	CvMat *sampleLabelMat = cvCreateMat(totalCount, 1, CV_32FC1);//样本标识  
	cvSetZero(sampleLabelMat);  

	char positiveImgPath[256];
	for(int i=0; i<pCount; i++)  
	{  
		//载入图像
		sprintf(positiveImgPath, "%s%d.bmp", positivePath, i);
		string strPosPath(positiveImgPath);

		cv::Mat img = cv::imread(strPosPath);
		if( img.data == NULL )
		{
			cout<<"positive image sample load error: "<<i<<" "<<strPosPath<<endl;
			//return false;
			//system("pause");
			continue;
		}

		cv::HOGDescriptor hog(winSize, blockSize, blockStride, cellSize, nbins);
		vector<float> featureVec; 

		hog.compute(img, featureVec, winStride);  //计算HOG特征向量
		int featureVecSize = featureVec.size();

		//加上类标,转化为CvMat
		for (int j=0; j<featureVecSize; j++)  
		{  		
			CV_MAT_ELEM( *sampleFeaturesMat, float, i, j ) = featureVec[j]; 
		}  
		sampleLabelMat->data.fl[i] = 1;
	}
	cout<<"End of training for positive samples."<<endl<<endl;

	//训练负样本
	cout<<"3: Start to train negative samples: "<<endl;
	char negativeImgPath[256];
	for (int i=0; i<nCount; i++)
	{  
		//载入图像
		sprintf(negativeImgPath, "%s%d.bmp", negativePath, i);
		string strNegPath(negativeImgPath);

		cv::Mat img = cv::imread(strNegPath);
		if(img.data == NULL)
		{
			cout<<"negative image sample load error: "<<strNegPath<<endl;
			//return false;
			//system("pause");
			continue;
		}

		cv::HOGDescriptor hog(winSize, blockSize, blockStride, cellSize, nbins);
		vector<float> featureVec; 

		hog.compute(img,featureVec, winStride);//计算HOG特征向量
		int featureVecSize = featureVec.size();  

		for ( int j=0; j<featureVecSize; j ++)  
		{  
			CV_MAT_ELEM( *sampleFeaturesMat, float, i + pCount, j ) = featureVec[ j ];
		}  

		sampleLabelMat->data.fl[ i + pCount ] = -1;
	}  

	cout<<"End of training for negative samples."<<endl<<endl;

	//SVM训练
	cout<<"4: Start to train SVM classifier: "<<endl;
	//设置SVM参数
	CvSVMParams params;
	int iteration = 1000;
	double penaltyFactor = 0.01;
	params.svm_type = CvSVM::C_SVC;
	params.kernel_type = CvSVM::LINEAR;
	params.term_crit = cvTermCriteria(CV_TERMCRIT_ITER, iteration, FLT_EPSILON);
	params.C = penaltyFactor;
	//print
	cout<<"svm_type: C_SVC\nkernel_type: LINEAR\ntermination type: CV_TERMCRIT_ITER"
		<<"\ntermination iteration: "<<iteration<<"\ntermination epsilon: "<<FLT_EPSILON
		<<"\npenalty factor: "<<penaltyFactor<<endl;

	MySVM svm;
	svm.train( sampleFeaturesMat, sampleLabelMat, NULL, NULL, params ); //用线性SVM分类器训练
	svm.save(classifierSavePath);		//将SVM训练完的数据保存到指定的文件中

	cvReleaseMat(&sampleFeaturesMat);
	cvReleaseMat(&sampleLabelMat);

	int supportVectorSize = svm.get_support_vector_count();
	cout<<"\nsupport vector size of SVM:"<<supportVectorSize<<endl;
	cout<<"End of training SVM classifier."<<endl<<endl;

	//保存用于检测的HOG特征
	cout<<"5. Save SVM detector file: "<<endl;
	CvMat *sv,*alp,*re;//所有样本特征向量 
	sv  = cvCreateMat(supportVectorSize , dim, CV_32FC1);
	alp = cvCreateMat(1 , supportVectorSize, CV_32FC1);
	re  = cvCreateMat(1 , dim, CV_32FC1);
	CvMat *res  = cvCreateMat(1 , 1, CV_32FC1);

	cvSetZero(sv);
	cvSetZero(re);

	for(int i=0; i<supportVectorSize; i++)
	{
		memcpy( (float*)(sv->data.fl+i*dim), svm.get_support_vector(i), dim*sizeof(float));	
	}

	double* alphaArr = svm.get_alpha();
	int alphaCount = svm.get_alpha_count();

	for(int i=0; i<supportVectorSize; i++)
	{
		alp->data.fl[i] = alphaArr[i];
	}
	cvMatMul(alp, sv, re);

	int posCount = 0;
	for (int i=0; i<dim; i++)
	{
		re->data.fl[i] *= -1;
	}

	//保存为文本文件
	FILE* fp = fopen(detectorSavePath,"wb");
	if( NULL == fp )
	{
		return false;
	}
	for(int i=0; i<dim; i++)
	{
		fprintf(fp,"%f \n",re->data.fl[i]);
	}
	float rho = svm.get_rho();
	fprintf(fp, "%f", rho);
	fclose(fp);
	cout<<"Save "<<detectorSavePath<<" OK!"<<endl;

	return true;
}

//使用detectMultiScale检测
bool DetectMulti(const char* detectorSavePath, const char* testPath)
{
	cout<<"\n******************** Detection Multi********************"<<endl;

	CvCapture* cap = cvCreateFileCapture(testPath);
	if (!cap)
	{
		cout<<"avi file load error..."<<endl;
		return false;
	}

	vector<float> x;
	ifstream fileIn(detectorSavePath, ios::in);
	float val = 0.0f;
	while(!fileIn.eof())
	{
		fileIn>>val;
		x.push_back(val);
	}
	fileIn.close();

	vector<cv::Rect>  found;
	cv::HOGDescriptor hog(winSize, blockSize, blockStride, cellSize, nbins);
	hog.setSVMDetector(x);

	IplImage* img = NULL;
	cvNamedWindow("img", 0);
	cvNamedWindow("video", 0);

	int frameCount = 0;
	double timeSum = 0.0;
	while(img=cvQueryFrame(cap))
	{
		cvShowImage("video", img);
		frameCount++;

		double begin = clock();
		hog.detectMultiScale(img, found, 0, winStride, cv::Size(0,0), 1.05, 2);
		double end = clock();
		double diff = (end-begin)/CLOCKS_PER_SEC*1000;
		timeSum += diff;
		cout<< "Detection time is: "<<diff<<"ms"<<endl;

		if (found.size() > 0)
		{
			for (int i=0; i<found.size(); i++)
			{
				CvRect tempRect = cvRect(found[i].x, found[i].y, found[i].width, found[i].height);

				cvRectangle(img, cvPoint(tempRect.x,tempRect.y),
					cvPoint(tempRect.x+tempRect.width,tempRect.y+tempRect.height),CV_RGB(255,0,0), 2);
			}
		}
		cvShowImage("img", img);
		if (cvWaitKey(1) == 27)
		{
			break;
		}
	}
	cvReleaseCapture(&cap);

	cout<< "Average detection time is: "<<timeSum / frameCount<<"ms"<<endl;
	return true;
}

//使用detect检测
bool DetectSingle(const char* detectorSavePath, const char* testPath)
{
	cout<<"\n******************** Detection Single********************"<<endl;

	CvCapture* cap = cvCreateFileCapture(testPath);
	if (!cap)
	{
		cout<<"avi file load error..."<<endl;
		return false;
	}

	vector<float> x;
	ifstream fileIn(detectorSavePath, ios::in);
	float val = 0.0f;
	while(!fileIn.eof())
	{
		fileIn>>val;
		x.push_back(val);
	}
	fileIn.close();

	vector<cv::Point>  found;
	cv::HOGDescriptor hog(winSize, blockSize, blockStride, cellSize, nbins);
	hog.setSVMDetector(x);

	IplImage* img = NULL;
	cvNamedWindow("img", 0);
	cvNamedWindow("video", 0);

	int frameCount = 0;
	double timeSum = 0.0;
	while(img=cvQueryFrame(cap))
	{
		cvShowImage("video", img);
		frameCount++;

		double begin = clock();
		//检测:found为检测目标的左上角坐标点
		hog.detect(img, found, 0, winStride, cvSize(0,0));
		double end = clock();
		double diff = (end-begin)/CLOCKS_PER_SEC*1000;
		timeSum += diff;
		cout<< "Detection time is: "<<diff<<"ms"<<endl;

		if (found.size() > 0)
		{
			for (int i=0; i<found.size(); i++)
			{
				CvRect tempRect = cvRect(found[i].x, found[i].y, winSize.width, winSize.height);

				cvRectangle(img, cvPoint(tempRect.x,tempRect.y),
					cvPoint(tempRect.x+tempRect.width,tempRect.y+tempRect.height),CV_RGB(255,0,0), 2);

			}
		}
		cvShowImage("img", img);
		if (cvWaitKey(1) == 27)
		{
			break;
		}
	}
	cvReleaseCapture(&cap);

	cout<< "Average detection time is: "<<timeSum / frameCount<<"ms"<<endl;
	return true;
}

3、人头检测

主文件main.cpp如下:

#include "global.h"

//SVM分类器文件
const char* classifierSavePath = ".\\HOG_SVM.xml";
//HOG检测器文件
const char* detectorSavePath = ".\\HogDetector.txt";
//正负样本存储路径
const char* positivePath = ".\\pos_64_64\\";
const char* negativePath = ".\\neg_64_64\\";
//正负样本数目
const int pCount = 1416;
const int nCount = 1957;

//测试视频文件路径
const char* testVideoPath = ".\\test.avi";

int main(int argc, char* argv[])
{
	bool flag;

	训练
	flag = Train(positivePath, pCount, negativePath, nCount, classifierSavePath, detectorSavePath);
	if (!flag)
	{
		cout<<"Train error!\n";
		return -1;
	}

	检测-单尺度///
	flag = DetectSingle(detectorSavePath, testVideoPath);
	if (!flag)
	{
		cout<<"Detection error!\n";
		return -1;
	}

	检测-多尺度///
	//flag = DetectMulti(detectorSavePath, testVideoPath);
	//if (!flag)
	//{
	//	cout<<"Detection error!\n";
	//	return -1;
	//}

	system("pause");
	return 0;
}


4、结果

采集528幅图像使用xml分类器文件进行分类,一般识别准确率在99%左右;但是在对视频(CIF)做检测的时候,由于是滑动窗口的检测机制,准确率达不到这么高,大概有95%上下,每帧时间大概是31ms上下。除此之外,对于高分辨率视频的检测速度以及误检是个问题:误检主要是将静止物体识别为人头,或者是阴影的干扰,将非人头的运动物体识别为人头的情况比较少。下一步打算使用别的特征采取特征的融合,或者是结合背景建模去除静止物体等方法进行尝试,此外阴影消除算法也在考虑之列。

运行时间截图:



说明:

1、在training的时候一般把sample大小设置成窗口大小一样,开始可能需要resize sample(为了处理多尺度问题,可以使用multi-scale hog feature,然后用PCA降维)。

2、最后对检测出来的目标矩形框可能有多个,要采用一些方法如矩形合并法来处理,比如说多个目标框嵌套着,如果其中一个矩形框的中心在另一个矩形框中,则将这两个矩形框合并起来,直到最后合并到一个矩形框。这里的校正系数group_threshold(参考groupRectangles()函数)也能起到辅助找最合适的目标矩形框的作用。

3、因为hog检测出的矩形框比实际人体框要稍微大些,所以需要对这些矩形框大小尺寸做一些调整,比如更改参数scale0的值。


评论 119
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值