import numpy as np
def loadData():
data = np.matrix(
[[1.2, 2.1],
[2., 1.1],
[1.3, 1.],
[1.2, 1.],
[2., 1.]]
)
classLabels = [1.0, 1.0, -1.0, -1.0, 1.0]
# print(data.shape)
return data, classLabels
def fenleiqi(data,yuzhi,ls,fuhao): #数据,阈值,数据的列数,符号
fl_Array = np.ones((data.shape[0],1))
if fuhao == 'lt':
fl_Array[data[:,ls]<=yuzhi] = -1.0
else:
fl_Array[data[:,ls]>yuzhi] = -1.0
return fl_Array
def jueceshu(data,classLabels,W):
datamat = np.mat(data)#将数据转化为矩阵
clm_mat = np.mat(classLabels).T#转置
W_mat = np.mat(W)
num_step = 10.0
m,n = datamat.shape#获得数据的行列
# print(m,n)
beststump = {}#准备字典储存错误率最低时的数据
bestdata = np.mat(np.zeros((m,1)))
min_error = np.inf
for i in range(n):
data_min = datamat[:,i].min()
data_max = datamat[:,i].max()
min_step = (data_max-data_min) / num_step
# print(data_min,data_max,min_step)
for j in range(-1,int(num_step) + 1):#对步长进行循环
# print(j)
for fuhao in ['lt','gt']:#对符号进行循环
yuzhi = (data_min + float(j) * min_step)#设定阈值
gengxinlie = fenleiqi(datamat,yuzhi,i,fuhao)
errarr = np.mat(np.ones((m,1)))
errarr[gengxinlie == clm_mat] = 0
weightError = W_mat.T * errarr
# print(f'第{i}列,阈值为{yuzhi},{fuhao},错误率为{weightError}')
#找出最小错误率
if weightError < min_error:
min_error = weightError
bestdata = gengxinlie.copy()
beststump['阈值'] = yuzhi
beststump['列'] = i
beststump['符号'] = fuhao
# print(bestdata,beststump,min_error)
return bestdata,beststump,min_error #返回最佳决策树,数据,最小错误率
def Adaboost(data,classLabels,n=40):#n为默认迭代次数
shuArr = []
m = data.shape[0]#数据行数
W = np.full((m,1),1/m)#初始权重
GXW = np.full((m,1),1)#用来更新权重的
aggarr = np.mat(np.full((m,1),0.0))
for i in range(n):
bestdata,beststump,min_error = jueceshu(data,classLabels,W)
alpha = float(0.5*np.log((1-min_error)/min_error))#公式计算
beststump['alpha'] = alpha
shuArr.append(beststump)
#权重更新 定义一个列表,如果分类器得出的结果和原结果一样就为-1 不一样就为1,利用公式扩大权重
GXW[np.mat(classLabels).T ==bestdata] = -1
GXW[np.mat(classLabels).T !=bestdata] = 1
GXW = np.exp(GXW*alpha)#公式计算
W = np.multiply(GXW,W)
W = W/np.sum(W)#归一化处理
# print(W)
#计算更新之后的误差
GXArr = np.full((m, 1), 0.0)
aggarr += alpha * bestdata #强分类器
# print(aggarr)
L = np.sign(aggarr)
# print(L)
# print(np.mat(classLabels).T)
GXArr[L != np.mat(classLabels).T] = 1
# print(GXArr)
# GXArr = np.multiply(np.sign(aggarr)!=np.mat(classLabels).T,np.ones((m, 1)))
# print(GXArr)
error = np.sum(GXArr)/m
print(f'错误率为:{error}')
if error == 0.0:
break
return shuArr
def adaboosttest(datatest,testarr): #测试样本,多个分类器组成的样本
dataMatrix = np.mat(datatest) # 将待分类样本转为矩阵
m = dataMatrix.shape[0] # 得到测试样本的个数
qfl = np.mat(np.zeros((m, 1))) # 构建一个O列向量,作用同上
for i in range(len(testarr)): # 遍历所有弱分类器
# 基于stumpClassify()对每个弱分类器得到一个类别的估计值
jfl =fenleiqi(dataMatrix, testarr[i]['阈值'], testarr[i]['列'],testarr[i]['符号'])
qfl += testarr[i]['alpha'] * jfl # 输出的类别值乘以该单层决策树的alpha权重再累加到aggClassEst上 强分类器
# print(aggClassEst) # 打印结果
return np.sign(qfl) # 返回分类结果,aggClassEst大于0则返回+1,小于0则返回-1
if __name__ == '__main__':
x,y = loadData()
# W = np.full((5,1),0.2)
cla = Adaboost(x,y,4)
print(adaboosttest([[5, 1], [0, 2]], cla))
运行结果