ID3是采用了信息增益作为特征的选择的度量(越大越好),而C4.5采用的是信息增益比(越大越好),CART分类树采用基尼指数选择最优特征(越小越好)。
1.回归决策树:
#-*- coding=utf-8 -*-
import numpy as np
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
def creat_data(n):
'''
随机产生数据集
'''
np.random.seed(0)
X=5*np.random.rand(n,1)#n*1矩阵
#print(X)
y=np.sin(X).ravel()
#print(y)
noise_num=(int)(n/5)
#print(noise_num)
y[::5]+=3*(0.5-np.random.rand(noise_num))
return train_test_split(X,y,test_size=0.25,random_state=1)
def test_DecisionTreeRegressor(*data):
'''
#在Python里,带*的参数就是用来接受可变数量参数的。
'''
X_train,X_test,y_train,y_test=data
regr=DecisionTreeRegressor()
regr.fit(X_train,y_train)
print("the train score :%f"%(regr.score(X_train,y_train)))
print("the test score:%f "%regr.score(X_test,y_test))
#绘图
fig=plt.figure()
ax=fig.add_subplot(1,1,1)
X=np.arange(0.0,5.0,0.01)[:,np.newaxis]#增加新维度意思,现在是2维列向量
Y=regr.predict(X)
ax.scatter(X_train,y_train,label="train Sample",c='r')
ax.scatter(X_test,y_test,label="test Sample",c='b')#对应二维坐标点
ax.plot(X,Y,label=&