【sklearn】tree.DecisionTreeClassifier

rejudge

已于 2022-11-04 17:38:01 修改

阅读量306

点赞数

分类专栏： Python 文章标签： python sklearn

于 2022-07-20 09:38:09 首次发布

本文链接：https://blog.csdn.net/qq_45249685/article/details/125885603

版权

Python 专栏收录该内容

43 篇文章 3 订阅

订阅专栏

from sklearn import tree
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split


wine = load_wine()
wine.data

'''
    array([[1.423e+01, 1.710e+00, 2.430e+00, ..., 1.040e+00, 3.920e+00,
            1.065e+03],
           [1.320e+01, 1.780e+00, 2.140e+00, ..., 1.050e+00, 3.400e+00,
            1.050e+03],
           [1.316e+01, 2.360e+00, 2.670e+00, ..., 1.030e+00, 3.170e+00,
            1.185e+03],
           ...,
           [1.327e+01, 4.280e+00, 2.260e+00, ..., 5.900e-01, 1.560e+00,
            8.350e+02],
           [1.317e+01, 2.590e+00, 2.370e+00, ..., 6.000e-01, 1.620e+00,
            8.400e+02],
           [1.413e+01, 4.100e+00, 2.740e+00, ..., 6.100e-01, 1.600e+00,
            5.600e+02]])
'''

# pd查看
import pandas as pd
pd.concat([pd.DataFrame(wine.data),pd.DataFrame(wine.target)], axis=1)

	0	1	2	3	4	5	6	7	8	9	10	11	12	0
0	14.23	1.71	2.43	15.6	127.0	2.80	3.06	0.28	2.29	5.64	1.04	3.92	1065.0	0
1	13.20	1.78	2.14	11.2	100.0	2.65	2.76	0.26	1.28	4.38	1.05	3.40	1050.0	0
2	13.16	2.36	2.67	18.6	101.0	2.80	3.24	0.30	2.81	5.68	1.03	3.17	1185.0	0
3	14.37	1.95	2.50	16.8	113.0	3.85	3.49	0.24	2.18	7.80	0.86	3.45	1480.0	0
4	13.24	2.59	2.87	21.0	118.0	2.80	2.69	0.39	1.82	4.32	1.04	2.93	735.0	0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
173	13.71	5.65	2.45	20.5	95.0	1.68	0.61	0.52	1.06	7.70	0.64	1.74	740.0	2
174	13.40	3.91	2.48	23.0	102.0	1.80	0.75	0.43	1.41	7.30	0.70	1.56	750.0	2
175	13.27	4.28	2.26	20.0	120.0	1.59	0.69	0.43	1.35	10.20	0.59	1.56	835.0	2
176	13.17	2.59	2.37	20.0	120.0	1.65	0.68	0.53	1.46	9.30	0.60	1.62	840.0	2
177	14.13	4.10	2.74	24.5	96.0	2.05	0.76	0.56	1.35	9.20	0.61	1.60	560.0	2

178 rows × 14 columns

# 划分训练测试集
Xtrain, Xtest, Ytrain, Ytest = train_test_split(wine.data, wine.target, test_size=0.3)


''' 决策树模型
random_state
splitter=‘best’ 选取最重要的特征进行分枝，若为random则随机分支，降低过拟合
剪枝防过拟合
max_depth限制最大深度
min_samples_leaf=0.05若分枝后存在节点少于0.05*总数个，则不分枝
min_samples_split=5若节点包含少于5个样本则不分枝
降维防过拟合
max_feature现置考虑特征个数
min_impurity_decrease限制信息增益
'''
clf = tree.DecisionTreeClassifier(criterion='gini'
                                  ,random_state=3
                                  ,max_depth=3
                                  ,min_samples_leaf=0.05
                                  ,min_samples_split=2
                                 )
clf = clf.fit(Xtrain, Ytrain)
score = clf.score(Xtest, Ytest)
score

'''
    0.8888888888888888
'''

# clf.feature_importances_查看特征权重  *zip()聚合后返回
[*zip(wine.feature_names, clf.feature_importances_)]
'''
    [('alcohol', 0.44232962250700664),
     ('malic_acid', 0.0),
     ('ash', 0.0),
     ('alcalinity_of_ash', 0.0),
     ('magnesium', 0.0),
     ('total_phenols', 0.0),
     ('flavanoids', 0.4126765806025175),
     ('nonflavanoid_phenols', 0.0),
     ('proanthocyanins', 0.0),
     ('color_intensity', 0.0),
     ('hue', 0.0030484226577791127),
     ('od280/od315_of_diluted_wines', 0.1419453742326969),
     ('proline', 0.0)]
'''

# 超参数学习曲线
import matplotlib.pyplot as plt
test = []
for i in range(10):
    clf = tree.DecisionTreeClassifier(max_depth=i+1
                                      ,criterion='entropy'
                                      ,random_state=30
                                      ,splitter='random'
                                     )
    clf = clf.fit(Xtrain, Ytrain)
    score = clf.score(Xtest, Ytest)
    test.append(score)
plt.plot(range(1, 11), test, color='red', label='max_depth')
plt.legend() # 添加图例
plt.show