【sklearn】tree.DecisionTreeClassifier

from sklearn import tree
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split


wine = load_wine()
wine.data

'''
    array([[1.423e+01, 1.710e+00, 2.430e+00, ..., 1.040e+00, 3.920e+00,
            1.065e+03],
           [1.320e+01, 1.780e+00, 2.140e+00, ..., 1.050e+00, 3.400e+00,
            1.050e+03],
           [1.316e+01, 2.360e+00, 2.670e+00, ..., 1.030e+00, 3.170e+00,
            1.185e+03],
           ...,
           [1.327e+01, 4.280e+00, 2.260e+00, ..., 5.900e-01, 1.560e+00,
            8.350e+02],
           [1.317e+01, 2.590e+00, 2.370e+00, ..., 6.000e-01, 1.620e+00,
            8.400e+02],
           [1.413e+01, 4.100e+00, 2.740e+00, ..., 6.100e-01, 1.600e+00,
            5.600e+02]])
'''
# pd查看
import pandas as pd
pd.concat([pd.DataFrame(wine.data),pd.DataFrame(wine.target)], axis=1)
01234567891011120
014.231.712.4315.6127.02.803.060.282.295.641.043.921065.00
113.201.782.1411.2100.02.652.760.261.284.381.053.401050.00
213.162.362.6718.6101.02.803.240.302.815.681.033.171185.00
314.371.952.5016.8113.03.853.490.242.187.800.863.451480.00
413.242.592.8721.0118.02.802.690.391.824.321.042.93735.00
.............................................
17313.715.652.4520.595.01.680.610.521.067.700.641.74740.02
17413.403.912.4823.0102.01.800.750.431.417.300.701.56750.02
17513.274.282.2620.0120.01.590.690.431.3510.200.591.56835.02
17613.172.592.3720.0120.01.650.680.531.469.300.601.62840.02
17714.134.102.7424.596.02.050.760.561.359.200.611.60560.02

178 rows × 14 columns

# 划分训练测试集
Xtrain, Xtest, Ytrain, Ytest = train_test_split(wine.data, wine.target, test_size=0.3)


''' 决策树模型
random_state
splitter=‘best’ 选取最重要的特征进行分枝,若为random则随机分支,降低过拟合
剪枝防过拟合
max_depth限制最大深度
min_samples_leaf=0.05若分枝后存在节点少于0.05*总数个,则不分枝
min_samples_split=5若节点包含少于5个样本则不分枝
降维防过拟合
max_feature现置考虑特征个数
min_impurity_decrease限制信息增益
'''
clf = tree.DecisionTreeClassifier(criterion='gini'
                                  ,random_state=3
                                  ,max_depth=3
                                  ,min_samples_leaf=0.05
                                  ,min_samples_split=2
                                 )
clf = clf.fit(Xtrain, Ytrain)
score = clf.score(Xtest, Ytest)
score

'''
    0.8888888888888888
'''
# clf.feature_importances_查看特征权重  *zip()聚合后返回
[*zip(wine.feature_names, clf.feature_importances_)]
'''
    [('alcohol', 0.44232962250700664),
     ('malic_acid', 0.0),
     ('ash', 0.0),
     ('alcalinity_of_ash', 0.0),
     ('magnesium', 0.0),
     ('total_phenols', 0.0),
     ('flavanoids', 0.4126765806025175),
     ('nonflavanoid_phenols', 0.0),
     ('proanthocyanins', 0.0),
     ('color_intensity', 0.0),
     ('hue', 0.0030484226577791127),
     ('od280/od315_of_diluted_wines', 0.1419453742326969),
     ('proline', 0.0)]
'''

# 超参数学习曲线
import matplotlib.pyplot as plt
test = []
for i in range(10):
    clf = tree.DecisionTreeClassifier(max_depth=i+1
                                      ,criterion='entropy'
                                      ,random_state=30
                                      ,splitter='random'
                                     )
    clf = clf.fit(Xtrain, Ytrain)
    score = clf.score(Xtest, Ytest)
    test.append(score)
plt.plot(range(1, 11), test, color='red', label='max_depth')
plt.legend() # 添加图例
plt.show

在这里插入图片描述

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值