#学习自机器学习实战
import numpy as np
import math
def loadSimpleData():
dataMat=np.matrix([[1.,2.1],[2.,1.1],[1.3,1.],[1.,1.],[2.,1.]])#data
classLabels=[1.0,1.0,-1.0,-1.0,1.0]#label
return dataMat,classLabels
def stumpClassify(dataMatrix,dimen,threshVal,threshIneq):
retArray=np.ones((np.shape(dataMatrix)[0],1))#a easy thresh decide clasifier
if threshIneq=='lt':
retArray[dataMatrix[:,dimen]<=threshVal]=-1.0
else:
retArray[dataMatrix[:,dimen]>threshVal]=-1.0
return retArray
def buildStump(dataArr,classLabels,D):
dataMatrix=np.mat(dataArr);labelMat=np.mat(classLabels).T#initilize
m,n=np.shape(dataMatrix)# number and voclumn
numSteps=1.0;bestStump={};bestClasEst=np.mat(np.zeros((m,1)))#nuber of weakerclassifier bestclassier name and
minError=np.inf#initilize
for i in range(n):
rangeMin=dataMatrix[:,i].min();rangeMax=dataMatrix[:,i].max()#range of data
stepSize=(rangeMax-rangeMin)/numSteps#step
for j in range(-1,int(numSteps)+1):
for inequal in ['lt' ,'gt']:
threshVal=(rangeMin+float(j)*stepSize)#decide thresh try all
predictedVals=stumpClassify(dataMatrix,i,threshVal,inequal)#predict using weakclassifier
errArr=np.mat(np.ones((m,1)))#record errar
print("errArr1" ,errArr)
errArr[predictedVals==labelMat]=0#record error
print("errArr2" ,errArr)
weightedError=D.T*errArr
if(weightedError<minError):
minError=weightedError#record the best value then creat best weakclassifier
bestClasEst=predictedVals.copy()
bestStump['dim']=i#column feature
bestStump['thresh']=threshVal#value
bestStump['ineq']=inequal#way
return bestStump,minError,bestClasEst
def adaBoostTrainDS(dataArr,classLabels,numIt=40):
weakClassArr=[]#initilize
m=np.shape(dataArr)[0]#num of train test
D=np.mat(np.ones((m,1))/m)#initilize weights
aggClassEst=np.mat(np.zeros((m,1)))
for i in range(numIt):
bestStump,error,classEst=buildStump(dataArr,classLabels,D)#get bestweakclassifier
print("D:" ,D.T)
alpha=float(0.5*np.log((1.0-error)/max(error,1e-6)))#normalize means less error high alpha
bestStump['alpha']=alpha
weakClassArr.append(bestStump)#add about 40
print("classEst" ,classEst.T)#record
#expon=np.multiply(-1*alpha*np.mat(classLabels).T,classEst)#-1*a*label*predict
expon=math.e**(-alpha)#the way the book give i dont know why thw author didn't use it to update
D=np.multiply(D,expon)#update
D=D/D.sum()
aggClassEst+=alpha*classEst
print("aggClassEst:" ,aggClassEst.T)
aggErrors=np.multiply(np.sign(aggClassEst)!=np.mat(classLabels).T,np.ones((m,1)))
errorRate=aggErrors.sum()/m#record
print("total error:" ,errorRate,"\n")
if errorRate.all()==0.0:break
return weakClassArr
def adaClassify(datToClass,classifierArr):
dataMatrix=np.mat(datToClass)
m=np.shape(dataMatrix)[0]
aggClassEst=np.mat(np.zeros((m,1)))
for i in range(len(classifierArr)):
#print(classifierArr[i]['dim'],classifierArr[i]['thresh'],classifierArr[i]['ineq'])
classEst=stumpClassify(dataMatrix,classifierArr[i]['dim'],classifierArr[i]['thresh'],classifierArr[i]['ineq'])#test us every classifer
#classEst = stumpClassify(dataMatrix,classifierArr[i])
aggClassEst+=classifierArr[i]['alpha']*classEst#pridect *weight
print("aggClassEst" ,aggClassEst)
return np.sign(aggClassEst)#result
if __name__=='__main__':
x,y=loadSimpleData()
#x,y=loadDataSet('testSet.txt')
D=np.mat(np.ones((5,1))/5)#initilize weight
# a,b,c=buildStump(x,y,D)#get the classifier
w=adaBoostTrainDS(x,y)
print(np.shape(w))
print(adaClassify(x,w))