《深度学习》推导与代码实现（三）：线性代数

最新推荐文章于 2022-08-15 17:03:51 发布

xuesuoziluoshu

最新推荐文章于 2022-08-15 17:03:51 发布

阅读量221

点赞数

分类专栏：深度学习文章标签： python 线性代数 pca降维算法深度学习

本文链接：https://blog.csdn.net/xuesuoziluoshu/article/details/108611818

版权

深度学习专栏收录该内容

7 篇文章 1 订阅

订阅专栏

1.10 PCA (主成分分析)

在这里插入图片描述

以鸢尾花 iris 数据为例，展示 PCA 的使用。

手动实现 PCA

import pandas as pd 
import numpy as np 
from sklearn.datasets import load_iris
import matplotlib.pyplot as plt 
from sklearn.preprocessing import StandardScaler
%matplotlib inline

# 载入数据
iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df['label'] = iris.target
df.columns = ['sepal length', 'sepal width', 'petal length', 'petal width', 'label']
df.label.value_counts()

'''
2    50
1    50
0    50
Name: label, dtype: int64
'''

# 查看数据
df.tail()

在这里插入图片描述

# 查看数据
x = df.iloc[:, 0:4]
y = df.iloc[:, 4]
print("查看第一个数据：\n", x.iloc[0, 0:4])
print("查看第一个标签：\n", y.iloc[0])

'''
查看第一个数据：
 sepal length    5.1
sepal width     3.5
petal length    1.4
petal width     0.2
Name: 0, dtype: float64
查看第一个标签：
 0
'''

class PCA:
    def __init__(self):
        pass
    
    def fit(self, X, n_components):
        n_sample = np.shape(X)[0]
        covariance_matrix = (1 / (n_sample-1)) * (X - X.mean(axis=0)).T.dot(X - X.mean(axis=0))
        # 对协方差矩阵进行特征值分解
        eigenvalues, eigenvectors = np.linalg.eig(covariance_matrix)
        # 对特征值（特征向量）从大到小排序
        idx = eigenvalues.argsort()[::-1]
        eigenvalues = eigenvalues[idx][:n_components]
        eigenvectors = np.atleast_1d(eigenvectors[:, idx])[:, :n_components]
        # 得到低维表示
        X_transformed = X.dot(eigenvectors)
        return X_transformed

model = PCA()
Y = model.fit(x, 2)

principalDf = pd.DataFrame(np.array(Y),
                           columns=['principal component 1', 'principal component 2'])
Df = pd.concat([principalDf, y], axis = 1)
fig = plt.figure(figsize = (5,5))
ax = fig.add_subplot(1,1,1)
ax.set_xlabel('Principal Component 1', fontsize = 15)
ax.set_ylabel('Principal Component 2', fontsize = 15)
ax.set_title('2 component PCA', fontsize = 20)

targets = [0, 1, 2]
# ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']
colors = ['r', 'g', 'b']
for target, color in zip(targets,colors):
    indicesToKeep = Df['label'] == target
    ax.scatter(Df.loc[indicesToKeep, 'principal component 1'], 
               Df.loc[indicesToKeep, 'principal component 2'], 
               c = color, 
               s = 50)
ax.legend(targets)
ax.grid()

在这里插入图片描述

使⽤ sklearn 包实现 PCA

from sklearn.decomposition import PCA as sklearnPCA
sklearn_pca = sklearnPCA(n_components=2)
Y = sklearn_pca.fit_transform(x)

principalDf = pd.DataFrame(data = np.array(Y), columns = ['principal component 1', 'principal component 2'])
Df = pd.concat([principalDf, y], axis = 1)
fig = plt.figure(figsize = (5,5))
ax = fig.add_subplot(1,1,1)
ax.set_xlabel('Principal Component 1', fontsize = 15)
ax.set_ylabel('Principal Component 2', fontsize = 15)
ax.set_title('2 component PCA', fontsize = 20)

targets = [0, 1, 2]
# ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']
colors = ['r', 'g', 'b']
for target, color in zip(targets,colors):
    indicesToKeep = Df['label'] == target
    ax.scatter(Df.loc[indicesToKeep, 'principal component 1'], 
               Df.loc[indicesToKeep, 'principal component 2'], 
               c = color, 
               s = 50)
ax.legend(targets)
ax.grid()