决策树代码

最新推荐文章于 2024-05-15 13:05:21 发布

辽宁大学

最新推荐文章于 2024-05-15 13:05:21 发布

阅读量1.2k

点赞数

分类专栏： python 文章标签：机器学习

本文链接：https://blog.csdn.net/zhuiyunzhugang/article/details/105544014

版权

python 专栏收录该内容

13 篇文章 0 订阅

订阅专栏

#!/usr/bin/python
#encoding:utf-8

# 对原始数据进行分为训练数据和测试数据
import numpy as np
from sklearn import tree
from sklearn.model_selection import train_test_split
import pydotplus

def outlook_type(s):
    print(s)
    it = {b'sunny':1, b'overcast':2, b'rainy':3}
    return it[s]
def temperature(s):
    it = {b'hot':1, b'mild':2, b'cool':3}
    return it[s]
def humidity(s):
    it = {b'high':1, b'normal':0}
    return it[s]
def windy(s):
    it = {b'TRUE':1, b'FALSE':0}
    return it[s]

def play_type(s):
    it = {b'yes': 1, b'no': 0}
    return it[s]

play_feature_E = 'outlook', 'temperature', 'humidity', 'windy'
play_class = 'yes', 'no'

# 1、读入数据，并将原始数据中的数据转换为数字形式
data = np.loadtxt("play.tennies.txt", delimiter=" ", dtype=str,  converters={0:outlook_type, 1:temperature, 2:humidity, 3:windy,4:play_type})#converters 这个是一列，
# 第0列用out—look_type操作。
print(data)

x, y = np.split(data,(4,),axis=1)

# 2、拆分训练数据与测试数据，为了进行交叉验证
# x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3,random_state=2)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)

# 3、使用信息熵作为划分标准，对决策树进行训练
clf = tree.DecisionTreeClassifier(criterion='entropy')
print(clf)
clf.fit(x_train, y_train)

# 4、把决策树结构写入文件
dot_data = tree.export_graphviz(clf, out_file=None, feature_names=play_feature_E, class_names=play_class,
                                filled=True, rounded=True, special_characters=True)
graph = pydotplus.graph_from_dot_data(dot_data)
#graph.write_pdf('play1.pdf')这个包没安装上。所以总是报错

# 系数反映每个特征的影响力。越大表示该特征在分类中起到的作用越大
print(clf.feature_importances_)

# 5、使用训练数据预测，预测结果完全正确
answer = clf.predict(x_train)
y_train = y_train.reshape(-1)
print(answer)
print(y_train)
print(np.mean(answer == y_train))

# 6、对测试数据进行预测，准确度较低，说明过拟合
answer = clf.predict(x_test)
y_test = y_test.reshape(-1)
print(answer)
print(y_test)
print(np.mean(answer == y_test))

# 运行结果：
# DecisionTreeClassifier(class_weight=None, criterion='entropy', max_depth=None,
#             max_features=None, max_leaf_nodes=None,
#             min_impurity_split=1e-07, min_samples_leaf=1,
#             min_samples_split=2, min_weight_fraction_leaf=0.0,
#             presort=False, random_state=None, splitter='best')
# [ 0.45572593  0.          0.36381472  0.18045935]
# ['0' '1' '0' '1' '0' '0' '1' '1' '1']
# ['0' '1' '0' '1' '0' '0' '1' '1' '1']
# 1.0
# ['0' '1' '1' '1' '1']
# ['0' '1' '1' '1' '1']
# 1.0

数据格式

sunny hot high FALSE no
sunny hot high TRUE no
overcast hot high FALSE yes
rainy mild high FALSE yes
rainy cool normal FALSE yes
rainy cool normal TRUE no
overcast cool normal TRUE yes
sunny mild high FALSE no
sunny cool normal FALSE yes
rainy mild normal FALSE yes
sunny mild normal TRUE yes
overcast mild high TRUE yes
overcast hot normal FALSE yes
rainy mild high TRUE no

参考链接：https://blog.csdn.net/kudou1994/article/details/95993347

https://blog.csdn.net/qq_36512295/article/details/98480240

https://www.cnblogs.com/hecc/p/8480532.html

https://blog.csdn.net/qq_34807908/article/details/81539536

辽宁大学

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
决策树代码

#!/usr/bin/python#encoding:utf-8# 对原始数据进行分为训练数据和测试数据import numpy as npfrom sklearn import treefrom sklearn.model_selection import train_test_splitimport pydotplusdef outlook_type(s): pri...
复制链接

扫一扫