决策树初学
from sklearn.tree import DecisionTreeClassifier as DTC
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
wine = load_wine()
x_train,x_test,y_train,y_test = train_test_split(wine.data,wine.target,test_size=0.3)
wine_model = DTC(criterion = 'entropy').fit(x_train,y_train)
score = wine_model.score(x_test,y_test)
score
0.8333333333333334
chn_name = ['酒精','苹果酸','灰','灰的碱性','镁','总酚','类黄酮','非黄烷类酚类','花青素','颜色强度','色调','od280/od315稀释葡萄酒','脯氨酸']
import graphviz
from sklearn import tree
dot_data = tree.export_graphviz(wine_model
,feature_names = chn_name
,class_names = ["香槟","冰酒","雪莉酒"]
,filled = True
,rounded = True)
graph = graphviz.Source(dot_data)
graph
[*zip(chn_name,wine_model.feature_importances_)]
[(‘酒精’, 0.0),
(‘苹果酸’, 0.021261242188572074),
(‘灰’, 0.0),
(‘灰的碱性’, 0.0),
(‘镁’, 0.014295818601915524),
(‘总酚’, 0.0),
(‘类黄酮’, 0.021492393774203793),
(‘非黄烷类酚类’, 0.0),
(‘花青素’, 0.0),
(‘颜色强度’, 0.1297047943978347),
(‘色调’, 0.03135267973405255),
(‘od280/od315稀释葡萄酒’, 0.38363224082606673),
(‘脯氨酸’, 0.39826083047735467)]
#调整模型
wine_model = DTC(criterion = 'entropy'
,random_state = 30
,splitter = 'random'
).fit(x_train,y_train)
score = wine_model.score(x_test,y_test)
score
0.9074074074074074
dot_data = tree.export_graphviz(wine_model
,feature_names = chn_name
,class_names = ["香槟","冰酒","雪莉酒"]
,filled = True
,rounded = True)
graph = graphviz.Source(dot_data)
graph
#剪枝
score_train = wine_model.score(x_train,y_train)
score_train
wine_model = DTC(criterion = 'entropy'
,random_state = 30
,splitter = 'random'
,max_depth = 3
# ,min_samples_leaf = 10
# ,min_samples_split = 10
).fit(x_train,y_train)
dot_data = tree.export_graphviz(wine_model
,feature_names = chn_name
,class_names = ["香槟","冰酒","雪莉酒"]
,filled = True
,rounded = True)
graph = graphviz.Source(dot_data)
graph
score = wine_model.score(x_test,y_test)
score
0.9629629629629629
wine_model = DTC(criterion = 'entropy'
,random_state = 30
,splitter = 'random'
,max_depth = 3
# ,min_samples_leaf = 10
,min_samples_split = 10
).fit(x_train,y_train)
dot_data = tree.export_graphviz(wine_model
,feature_names = chn_name
,class_names = ["香槟","冰酒","雪莉酒"]
,filled = True
,rounded = True)
graph = graphviz.Source(dot_data)
graph
score = wine_model.score(x_test,y_test)
score
0.9629629629629629