import numpy as np
import sklearn.datasets as sd
import pandas as pd
import matplotlib.pyplot as plt
boston = sd.load_boston()
boston.keys()
boston.filename
print(boston.DESCR)
print(boston.feature_names)
print(boston.data.shape)#x
boston.target.shape
#划分训练集和测试集
import sklearn.model_selection as ms
x = boston.data
y = boston.target
train_x,\
test_x,\
train_y,\
test_y = ms.train_test_split(x,y,test_size=0.1,random_state=7)
res = ms.train_test_split(x,y,test_size=0.1,random_state=7)
res[0].shape#训练集输入数据
res[1].shape# 测试集输入数据
res[2].shape# 训练集输出数据
res[3].shape# 测试集输出数据
#构建决策树模型
import sklearn.tree as st
import sklearn.metrics as sm
model = st.DecisionTreeRegressor(max_depth=6)
model.fit(train_x,train_y)
pred_test_y = model.predict(test_x)
pred_test_y
sm.r2_score(test_y,pred_test_y)
# Adaboost
import sklearn.tree as st
import sklearn.ensemble as se
model = st.DecisionTreeRegressor(max_depth=4)
model = se.AdaBoostRegressor(model,n_estimators=400,random_state=7)
model.fit(train_x,train_y)
pred_test_y = model.predict(test_x)
sm.r2_score(test_y,pred_test_y)
#获取特征重要性
fi = model.feature_importances_
fi
nzp = pd.Series(fi,index=boston.feature_names)
nzp.sort_values(ascending=False).plot.bar()
#GBDT
model = se.GradientBoostingRegressor(max_depth=6,
n_estimators=400,
min_samples_split=7)
model.fit(train_x,train_y)
pred_test_y = model.predict(test_x)
pred_test_y
sm.r2_score(test_y,pred_test_y)
params_depth = np.arange(4,14)
params_n = np.arange(400,1000,100)
params_split = np.arange(5,11)
for i in params_depth:
for j in params_n:
for k in params_split:
model = se.GradientBoostingRegressor(max_depth=i,
n_estimators=j,
min_samples_split=k)
model.fit(train_x,train_y)
pred_test_y = model.predict(test_x)
print(sm.r2_score(test_y,pred_test_y))
#作业:写代码:找出最优的参数组合(最大值索引)
决策树_实战_波士顿房价
最新推荐文章于 2024-04-26 14:23:07 发布