决策树实例load_wine数据集

决策树实例-红酒数据集

无参数模型

from sklearn import tree
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
import pandas as pd

wine = load_wine()
df1 = pd.concat([pd.DataFrame(wine.data), pd.DataFrame(wine.target)], axis=1)
print(df1)
#查看特征
print(wine.feature_names)
#查看标签
print(wine.target_names)
#分割数据集
x_train, x_test, y_train, y_test = train_test_split(wine.data, wine.target, test_size=0.3)
print(x_train.shape)
print(wine.data.shape)
# 实例化classifier
clf = tree.DecisionTreeClassifier(criterion='entropy')
clf = clf.fit(x_train, y_train)
score = clf.score(x_test, y_test)
print(score)

# feature_names 数据集的特征
feature_name=['酒精','苹果酸','灰','灰的碱性','镁','总酚','类黄酮','非黄烷类酚类','花青素','颜色强度','色调','od280/od315稀释葡萄酒','脯氨酸']
import graphviz
dot_data=tree.export_graphviz(clf,feature_names=feature_name,
                              class_names=['琴酒','雪莉','贝尔摩德'],
                              filled=True,
                              rounded=True)
graph=graphviz.Source(dot_data)
graph.render('tree')
print(clf.feature_importances_)
print([*zip(feature_name,clf.feature_importances_)])

参数选择

random_state

clf = tree.DecisionTreeClassifier(criterion='entropy', random_state=30)
clf = clf.fit(x_train, y_train)
score = clf.score(x_test, y_test)
print(score)

splitter

#splitter有best,random两种参数
#best:模型会优先选择较为重要的特征进行分支
#random:分支随机选择,树会更深,降低过拟合
clf = tree.DecisionTreeClassifier(criterion='entropy'
                                  , random_state=30,
                                  splitter='random')
clf = clf.fit(x_train, y_train)
score = clf.score(x_test, y_test)
print(score)

# feature_names 数据集的特征
feature_name=['酒精','苹果酸','灰','灰的碱性','镁','总酚','类黄酮','非黄烷类酚类','花青素','颜色强度','色调','od280/od315稀释葡萄酒','脯氨酸']
import graphviz
dot_data=tree.export_graphviz(clf,feature_names=feature_name,
                              class_names=['琴酒','雪莉','贝尔摩德'],
                              filled=True,
                              rounded=True)
graph=graphviz.Source(dot_data)
graph.render('tree')

剪枝策略

max_depth:限制深度
min_samples_leaf:每个叶子节点包含的最少样本数
min_samples_split至少包括多少个训练样本才允许分支
clf = tree.DecisionTreeClassifier(criterion='entropy'
                                  , random_state=30,
                                  splitter='random'
                                  , max_depth=3
                                  , min_samples_leaf=10
                                  ,min_samples_split=10
                                  )
clf = clf.fit(x_train, y_train)
score = clf.score(x_test, y_test)
print(score)

超参数学习曲线

通过图像更加直观反映max_depth对结果的影响,确定调参区间

import matplotlib.pyplot as plt

test = []
for i in range(10):
    clf = tree.DecisionTreeClassifier(criterion='entropy'
                                      ,max_depth=i+1
                                      , random_state=30,
                                      splitter='random'
                                      )
    clf = clf.fit(x_train, y_train)
    score = clf.score(x_test, y_test)
    test.append(score)
plt.plot(range(1,11),test,color='r',label='max_depth')
plt.legend()
plt.show()
# apply返回每个测试样本所在的叶子节点索引
print(clf.apply(x_test))
# predict返回每个测试样本的分类/回归结果
print(clf.predict(x_test))

参考

https://www.bilibili.com/video/BV1sb411c7S6

  • 4
    点赞
  • 38
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值