基于Boost方法的人脸检测（4）：弱分类器生成、强分类器生成（AdaBoost）

最新推荐文章于 2024-04-11 09:52:10 发布

mmc2015

最新推荐文章于 2024-04-11 09:52:10 发布

阅读量1.5k

点赞数 1

分类专栏：基于Boost方法的人脸检测，田永鸿老师《机器学习》文章标签：人脸检测基于Boost方法的人脸检测 Boost人脸检测

本文链接：https://blog.csdn.net/mmc2015/article/details/51286166

版权

基于Boost方法的人脸检测，田永鸿老师《机器学习》专栏收录该内容

5 篇文章 1 订阅

订阅专栏

本文介绍了如何使用Boost方法进行人脸检测，重点讲述了弱分类器的随机生成策略以及如何组合这些弱分类器形成强分类器，即AdaBoost算法的过程。

摘要由CSDN通过智能技术生成

弱分类器生成：

allWeakCLF=[]
for feaInd in range(data.shape[1]-1):
	tempD=data.iloc[:,[feaInd,-1]]
	tempD=tempD.sort(columns=tempD.columns[0], ascending=False)
	tempD=np.array(tempD)
	
	bestCLF=[0.0, 100000, 0] #threshold, errorNum, 0 or 1(flag)
	#0: errorNum is based on ">= threshold is positive"
	#1: errorNum is based on ">= threshold is negative"
	curPosNum=0
	curNegNum=0
	for threshold, label in tempD:
		if label==1:
			curPosNum+=1
		else:
			curNegNum+=1
		errorNum0=(posNum-curPosNum)+curNegNum
		errorNum1=(negNum-curNegNum)+curPosNum
		if errorNum0>=errorNum1:
			if errorNum1<bestCLF[1]:
				bestCLF=[threshold, errorNum1, 1] #threshold, errorNum, 0 or 1(flag)
		else: #errorNum0<errorNum1
			if errorNum0<bestCLF[1]:
				bestCLF=[threshold, errorNum0, 0] #threshold, errorNum, 0 or 1(flag)
	allWeakCLF.append(bestCLF)
joblib.dump(allWeakCLF, 'allWeakCLF.pkl')
#allWeakCLF=joblib.load('allWeakCLF.pkl')

随机方式生成强分类器或固定方式生成强分类器：

def MyAdaBoost(data, WeakClfInfo):
	
	#1: init
	dataWeight=[1.0/len(data)]*len(data)
	
	#2: generate strong clf
	#errorNum, ind, threshold, 0 or 1(flag), clfWeight(init with 1)
	for j, (errorNum, ind, threshold, flag, clfWeight) in enumerate(WeakClfInfo): #for each weak clf
		
		#2.1: caculate normalized weighted error(delta)
		errorFlag=[0]*len(data) #0:correct, 1:incorrect
		errorWeight=0.0
		totalWeight=0.0
		if flag==0: #0: errorNum is based on ">= threshold is positive"
			for i, (value, label) in enumerate(data[:,[ind, -1]]): #for each data
				totalWeight+=dataWeight[i]
				if value>=threshold and label==0:
					errorWeight+=dataWeight[i]
					errorFlag[i]=1
				elif value<threshold and label==1:
					errorWeight+=dataWeight[i]
					errorFlag[i]=1
		else: #flag==1, #1: errorNum is based on ">= threshold is negative"
			for i, (value, label) in enumerate(data[:,[ind, -1]]): #for each data
				totalWeight+=dataWeight[i]
				if value>=threshold and label==1:
					errorWeight+=dataWeight[i]
					errorFlag[i]=1
				elif value<threshold and label==0:
					errorWeight+=dataWeight[i]
					errorFlag[i]=1
		delta=errorWeight/totalWeight
		delta=math.sqrt((1-delta)/delta)
		
		#2.2: update dataWeight according to delta
		for i, flag in enumerate(errorFlag):
			if flag==0: #0:correct, 1:incorrect
				dataWeight[i]/=delta
			else:
				dataWeight[i]*=delta
		
		#2.3: update weak clf weight(and errorNum) according to delta
		WeakClfInfo[j][0]=delta #errorNum ==> normalized weighted error(delta)
		WeakClfInfo[j][4]=math.log(delta) #clfWeight
		#not WeakClfInfo[0]!!!!!! take a long time to find this bug!!!!
		
	#3: return the strong clf(WeakClfInfo now has the strong info)
	return WeakClfInfo


#how many strClf we want to train, weakClfNum in each strClf
def RandomGenerateStrClf(data, allWeakCLF, strClfNum=10, weakClfNum=60):
	randomStrClf=[]
	weakClfInd=range(len(allWeakCLF)) #weakClfInd~=feaInd
	for i in range(strClfNum):
		print "training the", i+1, "strong classifier", "^_^"*10
		sampledWeakClfInd=random.sample(weakClfInd,weakClfNum)
		sampledWeakClfInfo=[] #errorNum, ind, threshold, 0 or 1(flag), clfWeight(init with 1)
		for ind in sampledWeakClfInd:
			sampledWeakClfInfo.append([allWeakCLF[ind][1], ind, allWeakCLF[ind][0], allWeakCLF[ind][2], 1])
		sortedSampledWeakClfInfo=sorted(sampledWeakClfInfo)
		strongClfInfo=MyAdaBoost(data, sortedSampledWeakClfInfo)
		#strongClfInfo ==> list of [errorNum->delta, ind, threshold, 0 or 1(flag), new_clfWeight]
		randomStrClf.append(strongClfInfo)
	return randomStrClf

#how many strClf we want to train, weakClfNum in each strClf
def StaticGenerateStrClf(data, sortedWeakClfInfo, strClfNum=10, weakClfNum=20):
	staticStrClf=[]
	step=len(allWeakCLF)/strClfNum
	if step>weakClfNum:
		step=weakClfNum
	for i in range(strClfNum):
		print "training the", i+1, "strong classifier", "^_^"*10
		strongClfInfo=MyAdaBoost(data, sortedWeakClfInfo[i*step:i*step+weakClfNum])
		#strongClfInfo ==> list of [errorNum->delta, ind, threshold, 0 or 1(flag), new_clfWeight]
		staticStrClf.append(strongClfInfo)
	return staticStrClf