我的Scikit-learn学习6.20

最新推荐文章于 2021-03-01 21:48:42 发布

upupup21

最新推荐文章于 2021-03-01 21:48:42 发布

阅读量263

点赞数

文章标签：机器学习决策树

本文链接：https://blog.csdn.net/i13629207958/article/details/106875408

版权

学习笔记：
利用决策树对三种不同结构的数据集进行分类。

#模块导入
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_moons, make_circles, make_classification
from sklearn.tree import DecisionTreeClassifier
#Y为标签
X, Y = make_classification(n_samples=100 #100个样本
                          ,n_features=2 #2个特征，二维数据
                          ,n_redundant=0
                          ,n_informative=2
                          ,random_state=1
                          ,n_clusters_per_class=1
                          )
plt.scatter(X[:,0], X[:,1]) #X为二维数据，X[:,0]表示X中轴为0的数据
plt.show()
#使二分数据稍微疏散
rng = np.random.RandomState(2)
X += rng.uniform(size=X.shape)
linearly_separable = (X, Y)
plt.scatter(X[:,0], X[:,1])
plt.show()
#三组数据放入datasets
datasets = [make_moons(noise=0.3, random_state=0)
            ,make_circles(noise=0.2, factor=0.5, random_state=1)
           ,linearly_separable]
#创建画布，宽高比为6:9
figure = plt.figure(figsize=(6, 9))
#开始迭代datasets中的数据
i = 1
for ds_index, ds in enumerate(datasets):
    X, Y = ds
    X = StandardScaler().fit_transform(X)#数据标准化处理
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.4, random_state=42)
    #创造一个比数据集更大的区间
    x1_min, x1_max = X[:,0].min()-0.5, X[:,0].max()+0.5
    x2_min, x2_max = X[:,1].min()-0.5, X[:,1].max()+0.5
    #生成网格数据，生成两个二维数组
    array1, array2 = np.meshgrid(np.arange(x1_min, x1_max, 0.1)
                            ,np.arange(x2_min, x2_max, 0.1))
    #生成彩色画布
    cm = plt.cm.RdBu
    cm_bright = ListedColormap(['#FF0000','#0000FF'])
    ax = plt.subplot(len(datasets), 2, i)
    if ds_index == 0:
        ax.set_title("Input data")
    #将数据集放入坐标系
    ax.scatter(X_train[:,0], X_train[:,1], c=Y_train, cmap=cm_bright, edgecolors='k')
    ax.scatter(X_test[:,0], X_test[:,1], c=Y_test, cmap=cm_bright, alpha=0.6, edgecolors='k')#颜色RGBA
    ax.set_xlim(array1.min(), array1.max())
    ax.set_ylim(array2.min(), array2.max())
    ax.set_xticks(())
    ax.set_yticks(())
    i += 1
    ax = plt.subplot(len(datasets), 2, i)
    #决策树建模
    clf = DecisionTreeClassifier(max_depth = 5)
    clf.fit(X_train, Y_train)
    score = clf.score(X_test, Y_test)
    #绘制决策边界，Z为类概率
    Z = clf.predict_proba(np.c_[array1.ravel(), array2.ravel()])[:,1]
    Z = Z.reshape(array1.shape)
    ax.contourf(array1, array2, Z, cmap=cm, alpha=0.8)
    ax.scatter(X_train[:,0], X_train[:,1], c=Y_train, cmap=cm_bright, edgecolors='k')
    ax.scatter(X_test[:,0], X_test[:,1], c=Y_test, cmap=cm_bright, edgecolors='k', alpha=0.6)
    ax.set_xlim(array1.min(), array1.max())
    ax.set_ylim(array2.min(), array2.max())
    ax.set_xticks(())
    ax.set_yticks(())
    if ds_index == 0:
        ax.set_title("Decision Tree")
    #右下角添加分类效果
    ax.text(array1.max() - 0.3, array2.min() + 0.3, ("{:.1f}%".format(score*100)), size=15, horizontalalignment = "right")     
    i += 1
plt.tight_layout()#避免相邻图的坐标轴重叠
plt.savefig('D:\\pylearn\\py_pic\\DT.jpg')
plt.show()

在这里插入图片描述

upupup21

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
我的Scikit-learn学习6.20

学习笔记：利用决策树对三种不同结构的数据集进行分类。#模块导入import numpy as npimport matplotlib.pyplot as pltfrom matplotlib.colors import ListedColormapfrom sklearn.model_selection import train_test_splitfrom sklearn.preprocessing import StandardScalerfrom sklearn.datasets im
复制链接

扫一扫