分类问题AdaBoost算法
import math
import numpy as np
class Adaboost_tree:
def __init__(self,X,Y,feature_type='discrete'):
self.X = np.array(X)
self.Y = np.array(Y)
self.N = len(X)
self.feature_num = len(X[0])
self.w = np.array([1/self.N] * self.N)
self.g_x=[]
self.feature_type=feature_type #特征类型
self.get_feature_dict()
def compute_error(self,y):
y = np.array(y)
return np.sum(self.w[y != self.Y])
def compute_am(self,em):
return 1/2*math.log((1-em)/em)
def get_feature_dict(self):
self.f_dict = {}
for i in range(self.feature_num):
self.f_dict[i] = list(set([x[i] for x in self.X]))
def fit(self,max_iter=20):
for iter in range(max_iter):
index_list=[]
error_list1=[]
error_list2 = []
pred_y_list1 = []
pred_y_list2 = []
if self.feature_type == 'discrete':
for i in range(self.feature_num):
for j in self.f_dict[i]:
y1 = [1 if m[i] == j else -1 for m in self.X]
y2 = [-1 if m[i] == j else 1 for m in self.X]
error1 = self.compute_error(y1)
error2 = self.compute_error(y2)
index_list.append((i,j))
error_list1.append(error1)
error_list2.append(error2)
pred_y_list1.append(y1)
pred_y_list2.append(y2)
if self.feature_type == 'continuous':
for i in range(self.feature_num):
for j in self.f_dict[i]:
y1 = [1 if m[i] <= j else -1 for m in self.X]
y2 = [-1 if m[i] <= j else 1 for m in self.X]
error1 = self.compute_error(y1)
error2 = self.compute_error(y2)
index_list.append((i,j))
error_list1.append(error1)
error_list2.append(error2)
pred_y_list1.append(y1)
pred_y_list2.append(y2)
if min(error_list1) <= min(error_list2):
min_index = error_list1.index(min(error_list1))
split_f_index,split_value = index_list[min_index]
pred_y = pred_y_list1[min_index]
positive = 1
else:
min_index = error_list2.index(min(error_list2))
split_f_index,split_value = index_list[min_index]
pred_y = pred_y_list2[min_index]
positive = -1
em = self.compute_error(pred_y)
if em == 0:
print('em is zero break')
break
am = self.compute_am(em)
self.g_x.append((split_f_index,split_value,positive,am))
w_list = self.w * np.exp(-am * self.Y * np.array(pred_y))
self.w = w_list/np.sum(w_list)
def predict_single(self,x):
result = 0
for split_f_index,split_value,positive,am in self.g_x:
if self.feature_type == 'discrete':
if x[split_f_index] == split_value:
result += positive * am
else:
result += - positive * am
elif self.feature_type == 'continuous':
if x[split_f_index] <= split_value:
result += positive * am
else:
result += - positive * am
return np.sign(result)
def predict(self,X):
result = [self.predict_single(x) for x in X]
return result
def main():
X = np.array([[0, 1, 3], [0, 3, 1], [1, 2, 2], [1, 1, 3], [1, 2, 3],
[0, 1, 2], [1, 1, 2], [1, 1, 1], [1, 3, 1], [0, 2, 1]])
Y = np.array([-1, -1, -1, -1, -1, -1, 1, 1, -1, -1])
Adaboost_tree_ = Adaboost_tree(X,Y,feature_type='continuous')
Adaboost_tree_.fit(20)
print(Adaboost_tree_.predict(X))
if __name__ == '__main__':
main()
#############result########################
/usr/bin/python3 /Users/zhengyanzhao/PycharmProjects/tongjixuexi/shixian2/AdaBoost_cat.py
[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 1.0, 1.0, -1.0, -1.0]
回归问题提升树算法
单个平方误差回归树代码 请参考我之前的博客
统计学习方法第五章作业:ID3/C4.5算法分类决策树、平方误差二叉回归树代码实现
from shixian2 import reg_tree
import numpy as np
class adboost_reg_tree:
def __init__(self):
self.tree_list = []
def fit(self,X,Y,max_iter=5,min_leave_data=3):
self.X = np.array(X)
self.Y = np.array(Y)
for i in range(max_iter):
reg_t = reg_tree.Cart_reg(self.X, self.Y, min_leave_data)
reg_t.build_tree()
pred_y = np.array(reg_t.predict(self.X))
print(pred_y)
self.tree_list.append(reg_t)
self.Y = self.Y - pred_y
if (self.Y == 0).all():
print('total_fit')
break
def predict(self,X):
result = np.zeros(len(X))
for i in self.tree_list:
y = i.predict(X)
result += np.array(y)
return result
def main():
X=[[1,5,7,4,8,1,2],
[2,3,5,5,2,7,8],
[1,2,3,4,5,6,7],
[1,2,1,2,2,3,9],
[2,8,9,7,0,1,4],
[4,8,3,4,5,6,7],
[4,1,3,1,5,8,0]]
Y= [2,6,2,5,8,3,2]
adboost_reg_tree_ = adboost_reg_tree()
adboost_reg_tree_.fit(X,Y,max_iter=5,min_leave_data=4)
print(adboost_reg_tree_.predict(X))
if __name__ == '__main__':
main()
#######result#########################
/usr/bin/python3 /Users/zhengyanzhao/PycharmProjects/tongjixuexi/shixian2/adboost_reg_tree.py
[2.25 6.33333333 2.25 6.33333333 6.33333333 2.25
2.25 ]
[-0.27083333 -0.27083333 -0.27083333 -1.33333333 1.20833333 1.20833333
-0.27083333]
[ 0.015625 -0.0625 0.015625 0.015625 0.45833333 -0.45833333
0.015625 ]
[ 0.00390625 0.00390625 0.00390625 -0.015625 0. 0.
0.00390625]
[ 0.00130208 -0.00195312 0.00043403 0.00043403 -0.00195312 0.00043403
0.00130208]
[2. 6.00195312 1.99913194 5.00043403 7.99804688 3.00043403
2. ]
Process finished with exit code 0