既是0也是1的寻道书生

诸天炁荡荡,我道日兴隆。

python决策树DecisionTreeClassifier模型

运行环境:win10 64位 py 3.6 pycharm 2018.1.1
import numpy as np
from sklearn.tree import    DecisionTreeClassifier
from sklearn import cross_validation
import  matplotlib.pyplot as plt
from sklearn import datasets
#加载数据
def load_data():
    iris = datasets.load_iris()
    X_train = iris.data
    y_train = iris.target
    return cross_validation.train_test_split(X_train, y_train, test_size=0.25, random_state=0, stratify=y_train)
#利用决策树进行分类
def test_DecisionTreeClassifier(*data):
    X_train, X_test, y_train, y_test = data
    clf = DecisionTreeClassifier()
    clf.fit(X_train, y_train)
    print("Traing score:%f"%(clf.score(X_train,y_train)))
    print("Testing score:%f"%(clf.score(X_test,y_test)))

X_train, X_test, y_train, y_test = load_data()
test_DecisionTreeClassifier(X_train, X_test, y_train, y_test)
#考察评价切分质量的评价标准criterion对于分类性能的影响
def test_DecisionTreeClassifier_criterion(*data):
    X_train, X_test, y_train, y_test = data
    criterions = ['gini','entropy']
    for criterion in criterions:
        clf = DecisionTreeClassifier(criterion=criterion)
        clf.fit(X_train,y_train)
        print('criterion:%s'%criterion)
        print("Traing score:%f" % (clf.score(X_train, y_train)))
        print("Testing score:%f"%(clf.score(X_test,y_test)))
X_train, X_test, y_train, y_test = load_data()
test_DecisionTreeClassifier_criterion(X_train, X_test, y_train, y_test)
# 检测随机划分与最优划分的影响
def test_DecisionTreeClassifier_splitter(*data):
    X_train, X_test, y_train, y_test = data
    splitters = ['best','random']
    for splitter in splitters:
        clf = DecisionTreeClassifier(splitter=splitter)
        clf.fit(X_train,y_train)
        print("splitter:%s"%splitter)
        print("Traing score:%f" % (clf.score(X_train, y_train)))
        print("Testing score:%f"%(clf.score(X_test,y_test)))
X_train, X_test, y_train, y_test = load_data()
test_DecisionTreeClassifier_splitter(X_train, X_test, y_train, y_test)
#考察深度对分类决策树的影响
def test_DecisionTreeClassifiter_depth(*data,maxdepth):
    X_train, X_test, y_train, y_test = data
    depths = np.arange(1,maxdepth)
    training_scores = []
    testing_scores = []
    for depth in depths:
        clf = DecisionTreeClassifier(max_depth=depth)
        clf.fit(X_train,y_train)
        training_scores.append(clf.score(X_train,y_train))
        testing_scores.append(clf.score(X_test,y_test))
    #绘图
    fig = plt.figure()
    ax = fig.add_subplot(1,1,1)
    ax.plot(depths,training_scores,label='traing score',marker='o')
    ax.plot(depths,testing_scores,label='testing score',marker='*')
    ax.set_xlabel('maxdepth')
    ax.set_ylabel('score')
    ax.set_title('Decision Tree Classification')
    ax.legend(framealpha=0.5,loc='best')
    plt.show()
X_train, X_test, y_train, y_test = load_data()
test_DecisionTreeClassifiter_depth(X_train, X_test, y_train, y_test,maxdepth=20)

这里写图片描述

阅读更多
版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/dingming001/article/details/80686473
想对作者说点什么? 我来说一句

没有更多推荐了,返回首页

加入CSDN,享受更精准的内容推荐,与500万程序员共同成长!
关闭
关闭