import numpy as np
from sklearn.tree import DecisionTreeRegressor
from sklearn import cross_validation
import matplotlib.pyplot as plt
#给出一个随机产生的数据
def create_data(n):
np.random.seed(0)
X = 5*np.random.rand(n,1)
y = np.sin(X).ravel()
noise_num=(int)(n/5)
y[::5] += 3*(0.5 - np.random.rand(noise_num))
return cross_validation.train_test_split(X,y,test_size=0.25,random_state=1)
#参数 n为数据集容量
#x是随机在0-1之间产生的 ,y是sin(x),其中y每隔5个点添加一个随机噪声
def test_DecisionTreeRegressor_depth(*data,maxdepth):
X_train,X_test,y_train,y_test = data
depths = np.arange(1,maxdepth)
training_scores= []
testing_scores = []
for depth in depths:
regr = DecisionTreeRegressor(max_depth = depth)
regr.fit(X_train,y_train)
training_scores.append(regr.score(X_train,y_train))
testing_scores.append(regr.score(X_test,y_test))
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.plot(depths,training_scores,label="traing score")
ax.plot(depths,testing_scores,label="testing_score")
ax.set_xlabel("maxdepth")
ax.set_ylabel("score")
ax.set_title("Decision tree Regression")
ax.legend(framealpha=0.5)
plt.show()
X_train,X_test,y_train,y_test = create_data(100)
print(X_train)
print(X_test)
print(y_train)
print(y_test)
test_DecisionTreeRegressor_depth(X_train,X_test,y_train,y_test,maxdepth=20)
#运行结果可以看到,随着树的深度加深,模型对训练集和预测集的拟合都在提升,样本只有100个
#树深度为7之后,无法划分
分类结果如下: