PCA降维算法

PCA降维算法_python实现

简单实现代码-python

import numpy as np


# 数据归一化
def featureNormalize(X):
    # (每一个数据-当前列的均值)/当前列的标准差
    n = X.shape[1]  # 维数
    mu = np.zeros((1,n))
    sigma = np.zeros((1,n))
    Y=np.zeros((X.shape[0],X.shape[1]))

    mu = np.mean(X,axis=0)  # 列平均值
    sigma = np.std(X,axis=0)  # 列标准差
    for i in range(n):
        Y[:,i] = (X[:,i] - mu[i]) / sigma[i]
    return Y,mu,sigma


# 映射数据
def reduceDimensionData(X_norm,U,K):
    # Z = np.zeros((X_norm.shape[0],K))

    U_reduce = U[:,0:K]  # 取前K个
    Z = np.dot(X_norm,U_reduce)
    return Z


def recoverData(Z,U,K):
    # X_rec=np.zeros((Z.shape[0],U.shape[0]))
    U_recude = U[:,0:K]
    X_rec = np.dot(Z,np.transpose(U_recude))  # 还原数据(近似)
    return X_rec


def PCA(X,K):
    print("原始数据:")
    print(X)

    x_copy = X.copy()
    x_norm,mu,sigma=featureNormalize(x_copy) #归一化
    # x_norm = x_copy

    print("归一化数据")
    print(x_norm)

    m = X.shape[0]  # 样本个数
    cov = np.dot(np.transpose(x_norm),x_norm) / m  # 协方差
    U,S,V = np.linalg.svd(cov)  # cov的奇异值分解
    print("S")
    print(S)

    Z = reduceDimensionData(x_norm,U,K)  # 降维数据
    print("降维数据")
    print(Z)

    X_rec = recoverData(Z,U,K)  # 还原
    # print(X_rec)
    for i in range(X_rec.shape[1]):
        X_rec[:,i] = (X_rec[:,i] *sigma[i])+mu[i]
    print("降维后还原数据:")
    print(X_rec)

    sum = 0
    for i in range(cov.shape[0]):
        sum = sum + S[i]
    ksum = 0
    for i in range(K):
        ksum = ksum + S[i]

    print("贡献率:")
    contriRatio = ksum / sum
    print(contriRatio)


def test():
    X = np.array([[-1,2,66,-1],[-2,6,58,-1],[-3,8,45,-2],
                  [1,9,36,1],[2,10,62,1],[3,5,83,2]])  # 导入数据,维度为4
    PCA(X,2)


if __name__ == '__main__':
    np.set_printoptions(suppress=True)
    test()

结果:

原始数据:
[[-1  2 66 -1]
 [-2  6 58 -1]
 [-3  8 45 -2]
 [ 1  9 36  1]
 [ 2 10 62  1]
 [ 3  5 83  2]]
归一化数据
[[-0.46291005 -1.73648628  0.50972854 -0.70710678]
 [-0.9258201  -0.24806947 -0.02216211 -0.70710678]
 [-1.38873015  0.49613894 -0.88648441 -1.41421356]
 [ 0.46291005  0.86824314 -1.48486139  0.70710678]
 [ 0.9258201   1.24034735  0.24378321  0.70710678]
 [ 1.38873015 -0.62017367  1.63999617  1.41421356]]
S
[2.27496357 1.49208403 0.21930923 0.01364317]
降维数据
[[ 0.54316071  1.86902683]
 [ 1.05637399  0.47581872]
 [ 2.16552407 -0.37225405]
 [-0.11661105 -1.73853588]
 [-1.16084093 -1.12805905]
 [-2.48760679  0.89400342]]
降维后还原数据:
[[-1.38796114  2.71908396 70.6695927  -1.04408654]
 [-1.63490905  5.63833517 55.54993895 -1.08525997]
 [-2.89740254  7.39507453 41.20533621 -1.82146769]
 [ 0.74857579 10.32875385 44.35076001  0.6243176 ]
 [ 2.00050576  9.07043478 56.26271813  1.37367778]
 [ 3.17119118  4.84831772 81.961654    1.95281882]]
贡献率:
0.9417618999761952

利用sklearn.decomposition 里的 PCA实现

#coding=utf-8
import numpy as np
from sklearn.decomposition import PCA
X = np.array([[-1,2,66,-1], [-2,6,58,-1], [-3,8,45,-2], [1,9,36,1], [2,10,62,1], [3,5,83,2]])  #导入数据,维度为4
pca = PCA(n_components=2)   #降到2维
pca.fit(X)                  #训练
newX=pca.fit_transform(X)   #降维后的数据
# PCA(copy=True, n_components=2, whiten=False)
print("各个主成分的贡献率:")
print(pca.explained_variance_ratio_)  #输出贡献率
print("原始数据:")
print(X)
print("降维后的数据:")
print(newX)   #输出降维后的数据
print("降维后还原的数据:")
oldX=pca.inverse_transform(newX)
print(oldX) #输出降维后还原的数据

结果:

各个主成分的贡献率:
[0.95713353 0.03398198]
原始数据:
[[-1  2 66 -1]
 [-2  6 58 -1]
 [-3  8 45 -2]
 [ 1  9 36  1]
 [ 2 10 62  1]
 [ 3  5 83  2]]
降维后的数据:
[[  7.96504337   4.12166867]
 [ -0.43650137   2.07052079]
 [-13.63653266   1.86686164]
 [-22.28361821  -2.32219188]
 [  3.47849303  -3.95193502]
 [ 24.91311585  -1.78492421]]
降维后还原的数据:
[[-1.94037472  3.03463549 66.1814671  -1.37118938]
 [-1.27479012  5.28296796 57.87329006 -0.852196  ]
 [-2.04477761  6.73464855 44.78058024 -1.25592271]
 [-0.10998941 10.48087847 36.25669113  0.11736839]
 [ 2.61200674  9.04489278 61.8357845   1.72394933]
 [ 2.75792512  5.42197676 83.07218697  1.63799036]]

实现原理参考下面的博客,他们都写得很详细

参考:

例子比较齐全,还有GitHub代码

主成分分析PCA降维_Python

解释比较详细:

PCA 降维算法详解 以及代码示例

这个比较简略

PCA降维及python实现

  • 6
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值