Python数据分析与挖掘实战的决策树纠错

#导入必要的库
import os
import xlrd
from sklearn.feature_extraction import DictVectorizer
import csv
from sklearn import preprocessing
from sklearn import tree
import pandas as pd
"""""
data[u'销量'][(data[u'销量']=='高')]= "1"
data[u'是否有促销'][(data[u'是否有促销']=='是')]= "1"
data[u'天气'][(data[u'天气']=='好')]= "1"
data[data != "1"] = -1
data = pd.DataFrame(data, dtype="int")
x = data.iloc[:,:3].values
y = data.iloc[:,3].values
"""
lables = []    #用于存储标记实例,也就是本例中的是否购入电脑
feature = []   #用于存储特征
data1 = xlrd.open_workbook(os.path.join('D:/PythonProject/python02/决策树/data', 'sales_data.xls'))
table = data1.sheets()[0]
nrows = table.nrows
for i in range(nrows):
    if i == 0:
        continue
    num = table.row_values(i)

    num= num[4]
    print(num)

    lables.append(num)
    features = {}
    for each in range(1, len(table.row_values(i))- 1):
       features[header[each]] = table.row_values(i)[each]
    feature.append(features)
#print(feature)
#print(lables)
vec = DictVectorizer()
x = vec.fit_transform(feature).toarray()
print('特征提取后的X'+'\n'+str(x))
# print(headers)
lab = preprocessing.LabelBinarizer()
print(lables)
y = lab.fit_transform(lables)
print('Y'+'\n'+str(y))
result = tree.DecisionTreeClassifier(criterion='entropy')
result.fit(x,y)
with open('tree1.dot', 'w') as f:
    f = tree.export_graphviz(result,out_file=f,feature_names=vec.get_feature_names())

安装Graphviz工具可视化决策树
dot -Tpdf tree1.dot -o pic.pdf
在这里插入图片描述
参考链接

https://blog.csdn.net/csqazwsxedc/article/details/65697652
https://blog.csdn.net/weixin_43084928/article/details/82455326
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值