【红酒数据集】决策树&随机森林对比

%matplotlib inline
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_wine
wine = load_wine()
wine
[{"metadata":{"trusted":false,"scrolled":false},"id":"e7e71df6","cell_type":"code","source":"wine","execution_count":88,"outputs":[{"data":{"text/plain":"{'data': array([[1.423e+01, 1.710e+00, 2.430e+00, ..., 1.040e+00, 3.920e+00,\n         1.065e+03],\n        [1.320e+01, 1.780e+00, 2.140e+00, ..., 1.050e+00, 3.400e+00,\n         1.050e+03],\n        [1.316e+01, 2.360e+00, 2.670e+00, ..., 1.030e+00, 3.170e+00,\n         1.185e+03],\n        ...,\n        [1.327e+01, 4.280e+00, 2.260e+00, ..., 5.900e-01, 1.560e+00,\n         8.350e+02],\n        [1.317e+01, 2.590e+00, 2.370e+00, ..., 6.000e-01, 1.620e+00,\n         8.400e+02],\n        [1.413e+01, 4.100e+00, 2.740e+00, ..., 6.100e-01, 1.600e+00,\n         5.600e+02]]),\n 'target': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,\n        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,\n        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n        2, 2]),\n 'frame': None,\n 'target_names': array(['class_0', 'class_1', 'class_2'], dtype='
wine.data.shape
(178, 13)
wine.target
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2])
#实例化
#训练集带入实例化后的模型去进行训练,使用的接口是fit
#使用其他接口将测试集导入我们训练好的模型,去获取我们是希望获取的结果()score,Y_test)
from sklearn.model_selection import train_test_split

Xtrain,Xtest,Ytrain,Ytest = train_test_split(wine.data,wine.target,test_size=0.3)#所有的特征和标签分开
clf = DecisionTreeClassifier(random_state= 0)#实例化 #r是控制随机性andom_state是控制随机性
rlf = RandomForestClassifier(random_state= 0)

clf = clf.fit(Xtrain,Ytrain)#导入训练集训练
rlf = rlf.fit(Xtrain,Ytrain)#导入训练集

score_c = clf.score(Xtest,Ytest)#模型精确性的标准,等于accuracy
score_r = rlf.score(Xtest,Ytest)
print("Single Tree:{}".format(score_c)
      ,"Random Forest:{}".format(score_r))
Single Tree:0.9814814814814815 Random Forest:1.0

#交叉验证:一个结果会对模型造成影响,希望在探究在不同的测试集和训练集来看模型的稳定性
#cross_val_score交叉验证不用自己分测试集和训练集,交叉验证输入的是完整的特征矩阵和完整的标签,cv是交叉验证的次数

from sklearn.model_selection import cross_val_score
import matplotlib.pyplot as plt

rfc = RandomForestClassifier(n_estimators = 25)
rfc_s = cross_val_score(rfc,wine.data,wine.target,cv=10)
     
clf = DecisionTreeClassifier()
clf_s = cross_val_score(clf,wine.data,wine.target,cv=10)

plt.plot(range(1,11),rfc_s,label="RandomForest")
plt.plot(range(1,11),clf_s,label="DecisionTree")
plt.legend()#请显示图例
plt.show()
#结果显示在每次交叉验证当中,随机森马的准确性大于等于决策树

superpa = []#实例化
for i in range(200):
    rfc = RandomForestClassifier(n_estimators=i+1,n_jobs=-1)#跑了200次#
    rfc_s = cross_val_score(rfc,wine.data,wine.target,cv=10).mean()
    superpa.append(rfc_s)
print(max(superpa),superpa.index(max(superpa))+1)#最高准确率以及它所在的位置
plt.figure(figsize=[20,5])
plt.plot(range(1,201),superpa)
plt.show()

#List.index(object),对象object在列表List当中的索引superpa.index(max(superpa))+1,其实是63
0.9944444444444445 62

#10次建模下的100次交叉验证的图像

rfc_l = []
clf_l = []
#建立两个列表来保存结果
for i in range(10):#每个模型进行十次交叉验证,总共100次交叉验证。
    rfc = RandomForestClassifier(n_estimators=25)
    rfc_s = cross_val_score(rfc,wine.data,wine.target,cv=10).mean()
    rfc_l.append(rfc_s)#把求得的平均值放到rfc_l这个列表中去
    
    clf = DecisionTreeClassifier()
    clf_s = cross_val_score(clf,wine.data,wine.target,cv=10).mean()
    clf_l.append(clf_s)#把求得的平均值放到clf_l这个列表中去

plt.plot(range(1,11),r、fc_l,label="RandomForest")
plt.plot(range(1,11),clf_l,label="DecisionTree")
plt.legend()
plt.show()

#结果:随着建模次数越来越多,随机森林比决策树好的越来越多

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值