《深度学习》推导与代码实现(三):线性代数

1.10 PCA (主成分分析)

在这里插入图片描述
在这里插入图片描述
以鸢尾花 iris 数据为例,展示 PCA 的使用。

手动实现 PCA
import pandas as pd 
import numpy as np 
from sklearn.datasets import load_iris
import matplotlib.pyplot as plt 
from sklearn.preprocessing import StandardScaler
%matplotlib inline
# 载入数据
iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df['label'] = iris.target
df.columns = ['sepal length', 'sepal width', 'petal length', 'petal width', 'label']
df.label.value_counts()

'''
2    50
1    50
0    50
Name: label, dtype: int64
'''
# 查看数据
df.tail()

在这里插入图片描述

# 查看数据
x = df.iloc[:, 0:4]
y = df.iloc[:, 4]
print("查看第一个数据:\n", x.iloc[0, 0:4])
print("查看第一个标签:\n", y.iloc[0])

'''
查看第一个数据:
 sepal length    5.1
sepal width     3.5
petal length    1.4
petal width     0.2
Name: 0, dtype: float64
查看第一个标签:
 0
'''
class PCA:
    def __init__(self):
        pass
    
    def fit(self, X, n_components):
        n_sample = np.shape(X)[0]
        covariance_matrix = (1 / (n_sample-1)) * (X - X.mean(axis=0)).T.dot(X - X.mean(axis=0))
        # 对协方差矩阵进行特征值分解
        eigenvalues, eigenvectors = np.linalg.eig(covariance_matrix)
        # 对特征值(特征向量)从大到小排序
        idx = eigenvalues.argsort()[::-1]
        eigenvalues = eigenvalues[idx][:n_components]
        eigenvectors = np.atleast_1d(eigenvectors[:, idx])[:, :n_components]
        # 得到低维表示
        X_transformed = X.dot(eigenvectors)
        return X_transformed
model = PCA()
Y = model.fit(x, 2)
principalDf = pd.DataFrame(np.array(Y),
                           columns=['principal component 1', 'principal component 2'])
Df = pd.concat([principalDf, y], axis = 1)
fig = plt.figure(figsize = (5,5))
ax = fig.add_subplot(1,1,1)
ax.set_xlabel('Principal Component 1', fontsize = 15)
ax.set_ylabel('Principal Component 2', fontsize = 15)
ax.set_title('2 component PCA', fontsize = 20)

targets = [0, 1, 2]
# ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']
colors = ['r', 'g', 'b']
for target, color in zip(targets,colors):
    indicesToKeep = Df['label'] == target
    ax.scatter(Df.loc[indicesToKeep, 'principal component 1'], 
               Df.loc[indicesToKeep, 'principal component 2'], 
               c = color, 
               s = 50)
ax.legend(targets)
ax.grid()

在这里插入图片描述

使⽤ sklearn 包实现 PCA
from sklearn.decomposition import PCA as sklearnPCA
sklearn_pca = sklearnPCA(n_components=2)
Y = sklearn_pca.fit_transform(x)
principalDf = pd.DataFrame(data = np.array(Y), columns = ['principal component 1', 'principal component 2'])
Df = pd.concat([principalDf, y], axis = 1)
fig = plt.figure(figsize = (5,5))
ax = fig.add_subplot(1,1,1)
ax.set_xlabel('Principal Component 1', fontsize = 15)
ax.set_ylabel('Principal Component 2', fontsize = 15)
ax.set_title('2 component PCA', fontsize = 20)

targets = [0, 1, 2]
# ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']
colors = ['r', 'g', 'b']
for target, color in zip(targets,colors):
    indicesToKeep = Df['label'] == target
    ax.scatter(Df.loc[indicesToKeep, 'principal component 1'], 
               Df.loc[indicesToKeep, 'principal component 2'], 
               c = color, 
               s = 50)
ax.legend(targets)
ax.grid()

在这里插入图片描述

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值