《从机器学习到深度学习》第五章数据降维代码实现

最新推荐文章于 2024-08-02 17:39:22 发布

Luzichang

最新推荐文章于 2024-08-02 17:39:22 发布

阅读量997

点赞数 2

分类专栏：学习代码文章标签：机器学习数据降维

学习代码专栏收录该内容

6 篇文章 0 订阅

订阅专栏

第五章讲述了多种功能相近的降维模型，包括：PCA.LDA,Isomap,t-SNE,MDS,LLE及其两种衍生。

本案例以将三维空间中的一个S状流形降到两维为目标。

执行代码为书本附带，由此下载 http://www.broadview.com.cn/book/5337


from time import time

import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.ticker import NullFormatter

from sklearn import manifold, datasets 
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

Axes3D #启用matplot 3D

n_points = 1000      #样本数量
X, color = datasets.samples_generator.make_s_curve(n_points, random_state=0)
n_neighbors = 10    #流行学习近邻数量
n_components = 2    #目标维度数

fig = plt.figure(figsize=(15, 8))
plt.suptitle("Dimension Reduction with %i points, %i neighbors"
             % (1000, n_neighbors), fontsize=14)


ax = fig.add_subplot(251, projection='3d')   #在三维空间绘制原始数据
ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=color, cmap=plt.cm.Spectral)
ax.view_init(4, -72)

methods = ['standard', 'ltsa', 'hessian', 'modified'] #定义三类LLE模型
labels = ['LLE', 'LTSA', 'Hessian LLE', 'Modified LLE']

for i, method in enumerate(methods):           #训练、显示LLE模型及其衍生模型
    #print('i,method',i,method)
    if i>2:
        continue
    t0 = time()
    Y = manifold.LocallyLinearEmbedding(n_neighbors, n_components,
                                        eigen_solver='auto',
                                        method=method).fit_transform(X)
    t1 = time()
    print("%s: %.2g sec" % (methods[i], t1 - t0)) #用t1-t0计算训练时间
    #显示降维结果
    ax = fig.add_subplot(252 + i)
    plt.scatter(Y[:, 0], Y[:, 1], c=color, cmap=plt.cm.Spectral)
    plt.title("%s (%.2g sec)" % (labels[i], t1 - t0))
    ax.xaxis.set_major_formatter(NullFormatter())
    ax.yaxis.set_major_formatter(NullFormatter())
    plt.grid(True)#格点
    plt.axis('tight')# 坐标轴适应数据量 axis 设置坐标轴

#初始化六种降维模型   
estimators = [(manifold.Isomap(n_neighbors, n_components), "Isomap"), #Isomap近邻数越大，效果越接近于普通PCA效果
              (manifold.MDS(n_components, max_iter=100, n_init=1), "MDS"),
              (manifold.SpectralEmbedding(n_components=n_components,
                                          n_neighbors=n_neighbors), "Laplace Eigenmaps"),#随近邻数变化最小
              (manifold.TSNE(n_components=n_components, init='pca', random_state=0), "t-SNE"),#训练时间长
              (PCA(n_components), "PCA"),
              (LDA(n_components=n_components), "LDA"),
              ]
#训练、显示六种降维模型
for idx, (estimator_obj, estimator_name) in enumerate(estimators):
    #print('idx,estimator_obj, estimator_name',idx,estimator_obj, estimator_name)
    # estimator_obj, estimator_name = estimator[0], estimator[1]
    t0 = time()
    if estimator_name=="LDA":
        Y = estimator_obj.fit_transform(X, (color).astype(int))
    else:
        Y = estimator_obj.fit_transform(X)
    t1 = time()
    print("%s: %.2g sec" % (estimator_name, t1 - t0))
    ax = fig.add_subplot(2, 5, 5+idx)
    plt.scatter(Y[:, 0], Y[:, 1], c=color, cmap=plt.cm.Spectral)
    plt.title("%s (%.2g sec)" % (estimator_name, t1 - t0))
    ax.xaxis.set_major_formatter(NullFormatter())
    ax.yaxis.set_major_formatter(NullFormatter())
    plt.grid(True)
    plt.axis('tight')
    
plt.show()

n_neighbours=10结果如下：

#standard: 0.15 sec
#ltsa: 0.25 sec
#hessian: 0.35 sec
#Isomap: 0.55 sec
#MDS: 5.5 sec
#Laplace Eigenmaps: 0.13 sec
#t-SNE: 45 sec
#PCA: 0.002 sec
#LDA: 0.005 sec
#PCA和LDA找到了一个最能体现样本之间差异的切面进行三维到二维的映射；MDS和流形学习模型都达到了展开彩带的效果；t-SNE训练时间长；只有LDA模型使用了标签数据。