导出网络数据#
% matplotlib inline
import numpy as np
from sklearn. model_selection import train_test_split
from sklearn. datasets import load_iris
from sklearn. tree import DecisionTreeClassifier
加载iris数据集#
iris = load_iris( )
X = iris. data
Y = iris. target
拆分训练集和测试集#
X_train, X_test, Y_train, Y_test = train_test_split( X, Y, test_size= 0.2 , random_state= 0 )
print ( len ( X_train) , len ( X_test) , len ( X) )
120 30 150
决策树模型训练
clf1 = DecisionTreeClassifier( max_depth= 4 , random_state= 0 )
clf1. fit( X_train, Y_train)
DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=4,
max_features=None, max_leaf_nodes=None,
min_impurity_decrease=0.0, min_impurity_split=None,
min_samples_leaf=1, min_samples_split=2,
min_weight_fraction_leaf=0.0, presort=False, random_state=0,
splitter='best')
决策树模型预测
clf1. predict( X_test)
array([2, 1, 0, 2, 0, 2, 0, 1, 1, 1, 2, 1, 1, 1, 1, 0, 1, 1, 0, 0, 2, 1,
0, 0, 2, 0, 0, 1, 1, 0])
评估模型
clf1. score( X_test, Y_test)
1.0