@[TOC](
大数据小白的决策树之旅
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
iris = load_iris()
x_train,x_test,y_train,y_test = train_test_split(iris.data,
iris.target,
test_size = 0.3,
random_state = 666
)
len(x_train),len(x_test),len(y_train),len(y_test)
#out:(105, 45, 105, 45)
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import load_iris
iris = load_iris()
ct = DecisionTreeClassifier()
ct.fit(iris.data,iris.target)
from sklearn.metrics import classification_report
print(classification_report(iris.target,ct.predict(iris.data)))
# out: precision recall f1-score support
# 0 1.00 1.00 1.00 50
# 1 1.00 1.00 1.00 50
# 2 1.00 1.00 1.00 50
#
# accuracy 1.00 150
# macro avg 1.00 1.00 1.00 150
#weighted avg 1.00 1.00 1.00 150
from sklearn.tree import export_graphviz
export_graphviz(ct,out_file = 'tree.dot',
feature_names = iris.feature_names,
class_names = iris.target_names)#对iris数据进行二分法
#利用graphviz软件产生决策树
![在这里插入图片描述](https://img-blog.csdnimg.cn/20201011170345313.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L1lqeTc5Mzc4NDU0Ng==,size_16,color_FFFFFF,t_70#pic_center)
![在这里插入图片描述](https://img-blog.csdnimg.cn/202010111704003.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L1lqeTc5Mzc4NDU0Ng==,size_16,color_FFFFFF,t_70#pic_center)
#对iris数据采用五分法,并输出模型训练的得分score
import numpy as np
from sklearn import svm
from sklearn.model_selection import cross_val_score
clf = svm.SVC(kernel='linear', C=1)
X, y = datasets.load_iris(return_X_y=True)
scores = cross_val_score(clf, X, y, cv=5)
scores
#out:array([0.96666667, 1. , 0.96666667, 0.96666667, 1. ])