基于Boost方法的人脸检测(4):弱分类器生成、强分类器生成(AdaBoost)

本文介绍了如何使用Boost方法进行人脸检测,重点讲述了弱分类器的随机生成策略以及如何组合这些弱分类器形成强分类器,即AdaBoost算法的过程。
摘要由CSDN通过智能技术生成


弱分类器生成:

allWeakCLF=[]
for feaInd in range(data.shape[1]-1):
	tempD=data.iloc[:,[feaInd,-1]]
	tempD=tempD.sort(columns=tempD.columns[0], ascending=False)
	tempD=np.array(tempD)
	
	bestCLF=[0.0, 100000, 0] #threshold, errorNum, 0 or 1(flag)
	#0: errorNum is based on ">= threshold is positive"
	#1: errorNum is based on ">= threshold is negative"
	curPosNum=0
	curNegNum=0
	for threshold, label in tempD:
		if label==1:
			curPosNum+=1
		else:
			curNegNum+=1
		errorNum0=(posNum-curPosNum)+curNegNum
		errorNum1=(negNum-curNegNum)+curPosNum
		if errorNum0>=errorNum1:
			if errorNum1<bestCLF[1]:
				bestCLF=[threshold, errorNum1, 1] #threshold, errorNum, 0 or 1(flag)
		else: #errorNum0<errorNum1
			if errorNum0<bestCLF[1]:
				bestCLF=[threshold, errorNum0, 0] #threshold, errorNum, 0 or 1(flag)
	allWeakCLF.append(bestCLF)
joblib.dump(allWeakCLF, 'allWeakCLF.pkl')
#allWeakCLF=joblib.load('allWeakCLF.pkl')




随机方式生成强分类器或固定方式生成强分类器:

def MyAdaBoost(data, WeakClfInfo):
	
	#1: init
	dataWeight=[1.0/len(data)]*len(data)
	
	#2: generate strong clf
	#errorNum, ind, threshold, 0 or 1(flag), clfWeight(init with 1)
	for j, (errorNum, ind, threshold, flag, clfWeight) in enumerate(WeakClfInfo): #for each weak clf
		
		#2.1: caculate normalized weighted error(delta)
		errorFlag=[0]*len(data) #0:correct, 1:incorrect
		errorWeight=0.0
		totalWeight=0.0
		if flag==0: #0: errorNum is based on ">= threshold is positive"
			for i, (value, label) in enumerate(data[:,[ind, -1]]): #for each data
				totalWeight+=dataWeight[i]
				if value>=threshold and label==0:
					errorWeight+=dataWeight[i]
					errorFlag[i]=1
				elif value<threshold and label==1:
					errorWeight+=dataWeight[i]
					errorFlag[i]=1
		else: #flag==1, #1: errorNum is based on ">= threshold is negative"
			for i, (value, label) in enumerate(data[:,[ind, -1]]): #for each data
				totalWeight+=dataWeight[i]
				if value>=threshold and label==1:
					errorWeight+=dataWeight[i]
					errorFlag[i]=1
				elif value<threshold and label==0:
					errorWeight+=dataWeight[i]
					errorFlag[i]=1
		delta=errorWeight/totalWeight
		delta=math.sqrt((1-delta)/delta)
		
		#2.2: update dataWeight according to delta
		for i, flag in enumerate(errorFlag):
			if flag==0: #0:correct, 1:incorrect
				dataWeight[i]/=delta
			else:
				dataWeight[i]*=delta
		
		#2.3: update weak clf weight(and errorNum) according to delta
		WeakClfInfo[j][0]=delta #errorNum ==> normalized weighted error(delta)
		WeakClfInfo[j][4]=math.log(delta) #clfWeight
		#not WeakClfInfo[0]!!!!!! take a long time to find this bug!!!!
		
	#3: return the strong clf(WeakClfInfo now has the strong info)
	return WeakClfInfo


#how many strClf we want to train, weakClfNum in each strClf
def RandomGenerateStrClf(data, allWeakCLF, strClfNum=10, weakClfNum=60):
	randomStrClf=[]
	weakClfInd=range(len(allWeakCLF)) #weakClfInd~=feaInd
	for i in range(strClfNum):
		print "training the", i+1, "strong classifier", "^_^"*10
		sampledWeakClfInd=random.sample(weakClfInd,weakClfNum)
		sampledWeakClfInfo=[] #errorNum, ind, threshold, 0 or 1(flag), clfWeight(init with 1)
		for ind in sampledWeakClfInd:
			sampledWeakClfInfo.append([allWeakCLF[ind][1], ind, allWeakCLF[ind][0], allWeakCLF[ind][2], 1])
		sortedSampledWeakClfInfo=sorted(sampledWeakClfInfo)
		strongClfInfo=MyAdaBoost(data, sortedSampledWeakClfInfo)
		#strongClfInfo ==> list of [errorNum->delta, ind, threshold, 0 or 1(flag), new_clfWeight]
		randomStrClf.append(strongClfInfo)
	return randomStrClf

#how many strClf we want to train, weakClfNum in each strClf
def StaticGenerateStrClf(data, sortedWeakClfInfo, strClfNum=10, weakClfNum=20):
	staticStrClf=[]
	step=len(allWeakCLF)/strClfNum
	if step>weakClfNum:
		step=weakClfNum
	for i in range(strClfNum):
		print "training the", i+1, "strong classifier", "^_^"*10
		strongClfInfo=MyAdaBoost(data, sortedWeakClfInfo[i*step:i*step+weakClfNum])
		#strongClfInfo ==> list of [errorNum->delta, ind, threshold, 0 or 1(flag), new_clfWeight]
		staticStrClf.append(strongClfInfo)
	return staticStrClf


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值