决策树:https://www.cnblogs.com/molieren/articles/10664954.html
from sklearn import datasets
from sklearn.model_selection import train_test_split,cross_val_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier,ExtraTreesClassifier
import random
if __name__ == '__main__':
datas = datasets.load_iris()
#打散数据:x_train,80%训练的data x_test 20%测试的data ,y_train 80%训练的target,y_test 20%测试的target
x_train,x_test,y_train,y_test = train_test_split(datas.data, datas.target,test_size=0.2, random_state=6)
#决策树分类器
model = DecisionTreeClassifier(criterion="entropy").fit(x_train,y_train)
#用80%的数据训练得出模型,然后用20%去预测,验证模型是否正确
# Y_test = model.predict(x_test)
# print(Y_test == y_test)
score = model.score(x_test, y_test)
#决策树score验证匹配率
print(score)
#随机森林:随机森林由决策树组成,随机性体现在每棵树的数据不一样 n_estimators=10 树的棵数
rf = RandomForestClassifier(criterion="entropy",n_estimators=128).fit(x_train, y_train)
rfscore = rf.score(x_test, y_test)
#随机森林score验证匹配率
print(+rfscore)
#极限森林是造不同的树,每棵树的根节点,叶节点的顺序是不一样的
ex = ExtraTreesClassifier(criterion="entropy",n_estimators=128).fit(x_train, y_train)
exs = ex.score(x_test, y_test)
#极限森林score验证匹配率
print(exs)
# 验证不用score模式,用交叉验证更准确,cross_val_score 一个数据集合既可能做训练集,也可能做测试集,避免过拟合
#cv=10 将数据分成10分,每一份做一次测试集
model1 = DecisionTreeClassifier(criterion="entropy")
rf1 = RandomForestClassifier(criterion="entropy", n_estimators=128)
ex1 = ExtraTreesClassifier(criterion="entropy", n_estimators=128)
momean1 = cross_val_score(model, x_train, y_train, cv=10).mean()
rfmean1 = cross_val_score(rf, x_train, y_train, cv=10).mean()
exmean1 = cross_val_score(ex, x_train, y_train, cv=10).mean()
#决策树交叉验证
print(momean1)
#随机森林交叉验证
print(rfmean1)
#极限森林交叉验证
print(exmean1)
#打散数据,利用随机的下标
model2 = DecisionTreeClassifier(criterion="entropy")
rf2 = RandomForestClassifier(criterion="entropy", n_estimators=128)
ex2 = ExtraTreesClassifier(criterion="entropy", n_estimators=128)
list = [x for x in range(150)]
random.shuffle(list)
X=[datas.data[index] for index in list ]
Y=[datas.target[index] for index in list]
momean2 = cross_val_score(model, X, Y, cv=10).mean()
rfmean2 = cross_val_score(rf, X, Y, cv=10).mean()
exmean2 = cross_val_score(ex, X, Y, cv=10).mean()
#打散数据后决策树交叉验证配配率
print(momean1)
#"打散数据后随机森林交叉验证配配率
print(rfmean1)
#打散数据后极限森林交叉验证配配率
print(exmean1)