决策树回归
## Bagging——分类任务
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeClassifier
df = pd.DataFrame([[0,1],[1,1],[2,1],[3,-1],[4,-1],
[5,-1],[6,1],[7,1],[8,1],[9,-1]])
M = [] #存储决策树模型的数组
n_trees = 20 #设置树的颗数
for i in range(n_trees):
tmp = df.sample(frac=1,replace=True)#对样本进行采样,目的是建造不同的树
X = tmp.iloc[:,:-1] #构造X
Y = tmp.iloc[:,-1]#构造Y
model = DecisionTreeClassifier(max_depth=3).fit(X,Y)#新建决策树模型
#将决策树模型加入数组
M.append(model)
#打印每个基础模型的效果
# if sum(model.predict(X))==10:
# print(tmp)
# print(model.predict(X))
X = df.iloc[:,:-1]#获取全部数据的X
Y = df.iloc[:,-1]#获取全部数据的Y
res = 0 #初始化全零向量
for i in M: #遍历模型数组
res += i.predict(X) #将每个模型预测值叠加到res变量
#取平均输出最终对每个样本标签的预测值
np.sign(res)
## Boosting——回归
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeRegressor
df = pd.DataFrame([[1,5.56],[2,5.7],[3,5.91],[4,6.4],[5,6.8]
,[6,7.05],[7,8.9],[8,8.7],[9,9],[10,9.05]],columns=["x","y"])
M = [] #存储决策树模型的数组
n_trees = 6 #设置树的颗数
X = df.iloc[:,:-1]#构造X
Y = df.iloc[:,-1]#构造Y
for i in range(n_trees):
model = DecisionTreeRegressor(max_depth=1).fit(X,Y) #新建决策树模型
#将决策树模型加入数组
M.append(model)
Y_het = model.predict(X) #输出模型预测值
# Y_het = #将模型预测值转为DataFrame
Y = Y - Y_het #改变原始的Y,让下一个学习器继续学习
res = np.zeros(df.shape[0]) #初始化全零向量
for i in M: #遍历模型数组
res += i.predict(X) #将每个模型预测值叠加到res变量
#输出最终对每个样本标签的预测值
res
随机森林回归
import pandas as pd
import numpy as np
df = pd.DataFrame([[1,5.56],[2,5.7],[3,5.91],[4,6.4],[5,6.8],[6,7.05],[7,8.9],[8,8.7],[9,9],[10,9.05]])
X = df.iloc[:,[0]]
Y = df.iloc[:,-1]
from sklearn.ensemble import RandomForestRegressor
model = RandomForestRegressor(n_estimators=350).fit(X,Y)
model.predict(X)
随机森林分类
import pandas as pd
import numpy as np
df = pd.DataFrame([[0,1],[1,1],[2,1],[3,-1],[4,-1],
[5,-1],[6,1],[7,1],[8,1],[9,-1]])
X = df.iloc[:,[0]]
Y = df.iloc[:,-1]
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(n_estimators=50).fit(X,Y)
model.predict(X)