课本中给出的是一个预测隐形眼镜的例子。
数据集样式如下:
young myope no reduced no lenses
young myope no normal soft
young myope yes reduced no lenses
young myope yes normal hard
young hyper no reduced no lenses
young hyper no normal soft
young hyper yes reduced no lenses
young hyper yes normal hard
pre myope no reduced no lenses
用sklearn实现的代码如下:
import numpy as np
import pandas as pd
#载入数据
labels=['age','prescript','astigmatic','tearRate','class']
feature=['age','prescript','astigmatic','tearRate']
lenses=pd.read_csv('C:/Users/lenovo/Desktop/lenses.txt',names=labels,sep='\t')
print(lenses)
#将数据转换为数值型
from sklearn import preprocessing
lenses_feature=lenses[feature]
le=preprocessing.LabelEncoder()
for col in lenses_feature.columns:
lenses_feature[col]=le.fit_transform(lenses_feature[col])
print(lenses_feature)
from sklearn.model_selection import train_test_split
#分离出测试集和验证集
X=lenses_feature
y=lenses['class']
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3)
#训练决策树模型
from sklearn import tree
dtc=tree.DecisionTreeClassifier().fit(X_train,y_train)
y_pred=dtc.predict(X_test)
print(dtc.predict([[1,1,0,1]]))
#评估模型
from sklearn import metrics
value=list(set(y))
print(metrics.classification_report(y_test,y_pred))
print(metrics.confusion_matrix(y_test,y_pred,labels=value))
#画决策树
import graphviz
feature_name=['age','prescript','astigmatic','tearRate']
dot_data=tree.export_graphviz(dtc,out_file=None
,feature_names= feature_name
,class_names=["no lenses","soft","hard"]
,filled=True
,rounded=True
)
graph = graphviz.Source(dot_data)
graph
#探索决策树
print(dtc.feature_importances_)
print([*zip(feature,dtc.feature_importances_)])