决策树代码

#!/usr/bin/python
#encoding:utf-8

# 对原始数据进行分为训练数据和测试数据
import numpy as np
from sklearn import tree
from sklearn.model_selection import train_test_split
import pydotplus

def outlook_type(s):
    print(s)
    it = {b'sunny':1, b'overcast':2, b'rainy':3}
    return it[s]
def temperature(s):
    it = {b'hot':1, b'mild':2, b'cool':3}
    return it[s]
def humidity(s):
    it = {b'high':1, b'normal':0}
    return it[s]
def windy(s):
    it = {b'TRUE':1, b'FALSE':0}
    return it[s]

def play_type(s):
    it = {b'yes': 1, b'no': 0}
    return it[s]

play_feature_E = 'outlook', 'temperature', 'humidity', 'windy'
play_class = 'yes', 'no'

# 1、读入数据,并将原始数据中的数据转换为数字形式
data = np.loadtxt("play.tennies.txt", delimiter=" ", dtype=str,  converters={0:outlook_type, 1:temperature, 2:humidity, 3:windy,4:play_type})#converters 这个是一列,
# 第0列用out—look_type操作。
print(data)

x, y = np.split(data,(4,),axis=1)

# 2、拆分训练数据与测试数据,为了进行交叉验证
# x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3,random_state=2)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)

# 3、使用信息熵作为划分标准,对决策树进行训练
clf = tree.DecisionTreeClassifier(criterion='entropy')
print(clf)
clf.fit(x_train, y_train)

# 4、把决策树结构写入文件
dot_data = tree.export_graphviz(clf, out_file=None, feature_names=play_feature_E, class_names=play_class,
                                filled=True, rounded=True, special_characters=True)
graph = pydotplus.graph_from_dot_data(dot_data)
#graph.write_pdf('play1.pdf')这个包没安装上。所以总是报错

# 系数反映每个特征的影响力。越大表示该特征在分类中起到的作用越大
print(clf.feature_importances_)

# 5、使用训练数据预测,预测结果完全正确
answer = clf.predict(x_train)
y_train = y_train.reshape(-1)
print(answer)
print(y_train)
print(np.mean(answer == y_train))

# 6、对测试数据进行预测,准确度较低,说明过拟合
answer = clf.predict(x_test)
y_test = y_test.reshape(-1)
print(answer)
print(y_test)
print(np.mean(answer == y_test))

# 运行结果:
# DecisionTreeClassifier(class_weight=None, criterion='entropy', max_depth=None,
#             max_features=None, max_leaf_nodes=None,
#             min_impurity_split=1e-07, min_samples_leaf=1,
#             min_samples_split=2, min_weight_fraction_leaf=0.0,
#             presort=False, random_state=None, splitter='best')
# [ 0.45572593  0.          0.36381472  0.18045935]
# ['0' '1' '0' '1' '0' '0' '1' '1' '1']
# ['0' '1' '0' '1' '0' '0' '1' '1' '1']
# 1.0
# ['0' '1' '1' '1' '1']
# ['0' '1' '1' '1' '1']
# 1.0

数据格式

sunny hot high FALSE no
sunny hot high TRUE no
overcast hot high FALSE yes
rainy mild high FALSE yes
rainy cool normal FALSE yes
rainy cool normal TRUE no
overcast cool normal TRUE yes
sunny mild high FALSE no
sunny cool normal FALSE yes
rainy mild normal FALSE yes
sunny mild normal TRUE yes
overcast mild high TRUE yes
overcast hot normal FALSE yes
rainy mild high TRUE no

参考链接:https://blog.csdn.net/kudou1994/article/details/95993347

https://blog.csdn.net/qq_36512295/article/details/98480240

https://www.cnblogs.com/hecc/p/8480532.html

https://blog.csdn.net/qq_34807908/article/details/81539536

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值