一、原理推导
对于m=1,2,…,M
二、Adaboost代码实现
AdaBoost算法实现的是将弱分类器提升成为强分类器,代码中使用的弱分类器是单层决策树,这也是使用的最多的弱分类器,然后我们就可以根据弱分类器构造出强分类器
from numpy import *
# 构建一层决策树,返回分类结果
def stumpclassify(dataMatrix,dimen,threshval,threshIneq):
#dataMatrix:数据;
#dimen:以当前数据第几个维度进行分割;
#threshval:分割阈值
#threshIneq:划分方向
retArray = ones((dataMatrix.shape[0],1))
if threshIneq == 'lt':
retArray[dataMatrix[:,dimen] <= threshval] = -1.0
else:
retArray[dataMatrix[:,dimen] > threshval] = -1.0
return retArray
def buildStump(dataMatrix,labels,weight):
m,n = dataMatrix.shape
numSteps = 20.0 #对特征划分数量
bestStump = {}
bestClassEst = mat(zeros((m,1)))
minError = inf
for i in range(n): #遍历所有特征
rangeMin = min(dataMatrix[:,i])
rangeMax = max(dataMatrix[:,i])
stepSize = (rangeMax-rangeMin)/numSteps
for j in range(-1,int(numSteps)+2): #遍历所有分割点
threshVal = rangeMin + j*float(stepSize)
for threshIneq in ['lt','gt']:
predictedVals = stumpclassify(dataMatrix,i,threshVal,threshIneq)
errorAarray = mat(ones((m,1)))
errorAarray[predictedVals==labels] = 0
weightedError = weight.T * errorAarray
if weightedError < minError:
minError = weightedError
bestClassEst = predictedVals.copy()
bestStump['dim'] = i
bestStump['threshVal'] = threshVal
bestStump['threshIneq'] = threshIneq
return bestStump,minError,bestClassEst
def train(dataMatrix,labels,numIter=100):
weekClassArr = []
m = dataMatrix.shape[0]
weight = mat(ones((m,1))/m) #初始化样本权值
aggClassEst = mat(zeros((m,1)))
for i in range(numIter):
bestStump,error,classEst = buildStump(dataMatrix,labels,weight)
alpha = float(0.5*log((1-error)/max(error,1e-16))) #计算当前基本分类器系数
bestStump['alpha'] = alpha
weekClassArr.append(bestStump)
expon = multiply(-1*alpha*labels,classEst)
weight = multiply(weight,exp(expon)) #更新样本权值
weight = weight/sum(weight)
aggClassEst += alpha*classEst
aggErrors = multiply(sign(aggClassEst) != labels,ones((m,1)))
errorRate = sum(aggErrors)/m
print("当前共有%d颗树,total error为:%f "%(i+1,errorRate))
if errorRate == 0.0:
break
return weekClassArr
def predict(dataMatrix,classifierArr):
dataMatrix = mat(dataMatrix)
m = dataMatrix.shape[0]
aggClassEst = mat(zeros((m,1)))
for i in range(len(classifierArr)):
classEst = stumpclassify(dataMatrix,classifierArr[i]['dim'],
classifierArr[i]['threshVal'],classifierArr[i]['threshIneq'])
aggClassEst += classifierArr[i]['alpha']*classEst
print(aggClassEst)
return sign(aggClassEst)
dataMat = matrix([[1. , 2.1],
[2. , 1.1],
[1.3 , 1.],
[1. , 1.],
[2. , 1.]])
classLabels = mat([1.0,1.0,-1.0,-1.0,1.0]).T
classifierArr = train(dataMat,classLabels,30)
t = predict([0,0],classifierArr)
print(t)
输出结果: