%matplotlib inline
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
加载iris数据集
iris = load_iris()
X = iris.data
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)
print(len(X_train),len(X_test),len(X))
120 30 150
决策树模型训练
clf1 = DecisionTreeClassifier(max_depth=4, random_state=0)
clf1.fit(X_train, y_train)
DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=4,
max_features=None, max_leaf_nodes=None,
min_impurity_decrease=0.0, min_impurity_split=None,
min_samples_leaf=1, min_samples_split=2,
min_weight_fraction_leaf=0.0, presort=False, random_state=0,
splitter='best')
决策树模型预测
clf1.predict(X_test)
array([1, 2, 2, 1, 0, 1, 1, 0, 0, 1, 2, 0, 1, 2, 2, 2, 0, 0, 1, 0, 0, 1,
0, 2, 0, 0, 0, 2, 2, 0])
模型评估
clf1.score(X_test,y_test)
0.9333333333333333
clf1.predict_proba(X_test)
array([[0. , 0.97674419, 0.02325581],
[0. , 0. , 1. ],
[0. , 0. , 1. ],
[0. , 0.97674419, 0.02325581],
[1. , 0. , 0. ],
[0. , 1. , 0. ],
[0. , 0.97674419, 0.02325581],
[1. , 0. , 0. ],
[1. , 0. , 0. ],
[0. , 0.97674419, 0.02325581],
[0. , 0. , 1. ],
[1. , 0. , 0. ],
[0. , 0.97674419, 0.02325581],
[0. , 0. , 1. ],
[0. , 0. , 1. ],
[0. , 0. , 1. ],
[1. , 0. , 0. ],
[1. , 0. , 0. ],
[0. , 0.97674419, 0.02325581],
[1. , 0. , 0. ],
[1. , 0. , 0. ],
[0. , 0.97674419, 0.02325581],
[1. , 0. , 0. ],
[0. , 0. , 1. ],
[1. , 0. , 0. ],
[1. , 0. , 0. ],
[1. , 0. , 0. ],
[0. , 0. , 1. ],
[0. , 0. , 1. ],
[1. , 0. , 0. ]])