简单的python决策树案例

写文章的目的是为了巩固所学，和方便回顾查找。如有讲错的地方，欢迎指出，谢谢。

RID age income student credit_rating buy
1 youth high no fair no
2 youth high no excellent no
3 middle_aged high no fair yes
4 senior medium no fair yes
5 senior low yes fair yes
6 senior low yes excellent no
7 middle_aged low yes excellent yes
8 youth medium no fair no
9 youth low yes fair yes
10 senior medium yes fair yes
11 youth medium yes excellent yes
12 middle_aged medium no excellent yes
13 middle_aged high yes fair yes
14 senior medium no excellent no
#导入必要的库
from sklearn.feature_extraction import DictVectorizer
import csv
from sklearn import preprocessing
from sklearn import tree
#加载数据文件
#试着打印结果，也可以作为指标加载数据是否成功

['RID', 'age', 'income', 'student', 'credit_rating', 'class_buys_computer']

DictVectorizer 使用可以参考这篇文章

lables = []    #用于存储标记实例，也就是本例中的是否购入电脑
feature = []   #用于存储特征

lables.append(row[len(row)-1])
features = {}
for each in range(1,len(row)-1):
feature.append(features)

vec = DictVectorizer()
x = vec.fit_transform(feature).toarray()
print('特征提取后的X'+'\n'+str(x))
lab = preprocessing.LabelBinarizer()
y = lab.fit_transform(lables)
print('Y'+'\n'+str(y))

特征提取后的X
[[0. 0. 1. 0. 1. 1. 0. 0. 1. 0.]
[0. 0. 1. 1. 0. 1. 0. 0. 1. 0.]
[1. 0. 0. 0. 1. 1. 0. 0. 1. 0.]
[0. 1. 0. 0. 1. 0. 0. 1. 1. 0.]
[0. 1. 0. 0. 1. 0. 1. 0. 0. 1.]
[0. 1. 0. 1. 0. 0. 1. 0. 0. 1.]
[1. 0. 0. 1. 0. 0. 1. 0. 0. 1.]
[0. 0. 1. 0. 1. 0. 0. 1. 1. 0.]
[0. 0. 1. 0. 1. 0. 1. 0. 0. 1.]
[0. 1. 0. 0. 1. 0. 0. 1. 0. 1.]
[0. 0. 1. 1. 0. 0. 0. 1. 0. 1.]
[1. 0. 0. 1. 0. 0. 0. 1. 1. 0.]
[1. 0. 0. 0. 1. 1. 0. 0. 0. 1.]
[0. 1. 0. 1. 0. 0. 0. 1. 1. 0.]]

[[0]
[0]
[1]
[1]
[1]
[0]
[1]
[0]
[1]
[1]
[1]
[1]
[1]
[0]]

result = tree.DecisionTreeClassifier(criterion='entropy')
#使用特征选择标准为entropy,默认为基尼系数”gini”
result.fit(x,y)
# print('result'+str(result))
with open('tree1.dot','w') as f:
f = tree.export_graphviz(result,out_file=f,feature_names=vec.get_feature_names())
#保存成文件