PCA实战

PCA实战

主成分分析(Principal Component Analysis, PCA)是一种常用的数据降维技术,它通过线性变换将高维数据投影到低维空间,同时尽可能保留原始数据的主要特征。PCA 的目标是找到数据中方差最大的方向,这些方向称为主成分。

PCA 的主要步骤

1.标准化数据:PCA 对数据的尺度敏感,因此通常需要对数据进行标准化处理,使得每个特征的均值为0,标准差为1。

2.计算协方差矩阵:协方差矩阵反映了数据中各特征之间的关系。

3.计算协方差矩阵的特征值和特征向量:特征向量表示主成分的方向,特征值表示主成分的方差。

4.选择主成分:选择前 k 个最大特征值对应的特征向量作为主成分。

5.转换数据:将原始数据投影到选定的主成分上,得到降维后的数据。

from sklearn import datasets
import numpy as np
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

导入数据集

iris = datasets.load_iris()
x = iris.data
y=iris.target
x.shape
(150, 4)

引入协方差矩阵

x_mean = np.mean(x, axis=0)
x_mean
array([5.84333333, 3.05733333, 3.758     , 1.19933333])
cov_x = (x - x_mean).T.dot((x - x_mean)) / (x.shape[0]-1)
cov_x
array([[ 0.68569351, -0.042434  ,  1.27431544,  0.51627069],
       [-0.042434  ,  0.18997942, -0.32965638, -0.12163937],
       [ 1.27431544, -0.32965638,  3.11627785,  1.2956094 ],
       [ 0.51627069, -0.12163937,  1.2956094 ,  0.58100626]])

求协方差矩阵特征值特征向量

eig_vals, eig_vecs = np.linalg.eig(cov_x)
print('特征值:',eig_vals)
特征值: [4.22824171 0.24267075 0.0782095  0.02383509]
print('特征向量:',eig_vecs)
特征向量: [[ 0.36138659 -0.65658877 -0.58202985  0.31548719]
 [-0.08452251 -0.73016143  0.59791083 -0.3197231 ]
 [ 0.85667061  0.17337266  0.07623608 -0.47983899]
 [ 0.3582892   0.07548102  0.54583143  0.75365743]]

降维

print(np.dot(x-x_mean,eig_vecs[:,:2]))
[[-2.68412563 -0.31939725]
 [-2.71414169  0.17700123]
 [-2.88899057  0.14494943]
 [-2.74534286  0.31829898]
 [-2.72871654 -0.32675451]
 [-2.28085963 -0.74133045]
 [-2.82053775  0.08946138]
 [-2.62614497 -0.16338496]
 [-2.88638273  0.57831175]
 [-2.6727558   0.11377425]
 [-2.50694709 -0.6450689 ]
 [-2.61275523 -0.01472994]
 [-2.78610927  0.235112  ]
 [-3.22380374  0.51139459]
 [-2.64475039 -1.17876464]
 [-2.38603903 -1.33806233]
 [-2.62352788 -0.81067951]
 [-2.64829671 -0.31184914]
 [-2.19982032 -0.87283904]
 [-2.5879864  -0.51356031]
 [-2.31025622 -0.39134594]
 [-2.54370523 -0.43299606]
 [-3.21593942 -0.13346807]
 [-2.30273318 -0.09870885]
 [-2.35575405  0.03728186]
 [-2.50666891  0.14601688]
 [-2.46882007 -0.13095149]
 [-2.56231991 -0.36771886]
 [-2.63953472 -0.31203998]
 [-2.63198939  0.19696122]
 [-2.58739848  0.20431849]
 [-2.4099325  -0.41092426]
 [-2.64886233 -0.81336382]
 [-2.59873675 -1.09314576]
 [-2.63692688  0.12132235]
 [-2.86624165 -0.06936447]
 [-2.62523805 -0.59937002]
 [-2.80068412 -0.26864374]
 [-2.98050204  0.48795834]
 [-2.59000631 -0.22904384]
 [-2.77010243 -0.26352753]
 [-2.84936871  0.94096057]
 [-2.99740655  0.34192606]
 [-2.40561449 -0.18887143]
 [-2.20948924 -0.43666314]
 [-2.71445143  0.2502082 ]
 [-2.53814826 -0.50377114]
 [-2.83946217  0.22794557]
 [-2.54308575 -0.57941002]
 [-2.70335978 -0.10770608]
 [ 1.28482569 -0.68516047]
 [ 0.93248853 -0.31833364]
 [ 1.46430232 -0.50426282]
 [ 0.18331772  0.82795901]
 [ 1.08810326 -0.07459068]
 [ 0.64166908  0.41824687]
 [ 1.09506066 -0.28346827]
 [-0.74912267  1.00489096]
 [ 1.04413183 -0.2283619 ]
 [-0.0087454   0.72308191]
 [-0.50784088  1.26597119]
 [ 0.51169856  0.10398124]
 [ 0.26497651  0.55003646]
 [ 0.98493451  0.12481785]
 [-0.17392537  0.25485421]
 [ 0.92786078 -0.46717949]
 [ 0.66028376  0.35296967]
 [ 0.23610499  0.33361077]
 [ 0.94473373  0.54314555]
 [ 0.04522698  0.58383438]
 [ 1.11628318  0.08461685]
 [ 0.35788842  0.06892503]
 [ 1.29818388  0.32778731]
 [ 0.92172892  0.18273779]
 [ 0.71485333 -0.14905594]
 [ 0.90017437 -0.32850447]
 [ 1.33202444 -0.24444088]
 [ 1.55780216 -0.26749545]
 [ 0.81329065  0.1633503 ]
 [-0.30558378  0.36826219]
 [-0.06812649  0.70517213]
 [-0.18962247  0.68028676]
 [ 0.13642871  0.31403244]
 [ 1.38002644  0.42095429]
 [ 0.58800644  0.48428742]
 [ 0.80685831 -0.19418231]
 [ 1.22069088 -0.40761959]
 [ 0.81509524  0.37203706]
 [ 0.24595768  0.2685244 ]
 [ 0.16641322  0.68192672]
 [ 0.46480029  0.67071154]
 [ 0.8908152   0.03446444]
 [ 0.23054802  0.40438585]
 [-0.70453176  1.01224823]
 [ 0.35698149  0.50491009]
 [ 0.33193448  0.21265468]
 [ 0.37621565  0.29321893]
 [ 0.64257601 -0.01773819]
 [-0.90646986  0.75609337]
 [ 0.29900084  0.34889781]
 [ 2.53119273  0.00984911]
 [ 1.41523588  0.57491635]
 [ 2.61667602 -0.34390315]
 [ 1.97153105  0.1797279 ]
 [ 2.35000592  0.04026095]
 [ 3.39703874 -0.55083667]
 [ 0.52123224  1.19275873]
 [ 2.93258707 -0.3555    ]
 [ 2.32122882  0.2438315 ]
 [ 2.91675097 -0.78279195]
 [ 1.66177415 -0.24222841]
 [ 1.80340195  0.21563762]
 [ 2.1655918  -0.21627559]
 [ 1.34616358  0.77681835]
 [ 1.58592822  0.53964071]
 [ 1.90445637 -0.11925069]
 [ 1.94968906 -0.04194326]
 [ 3.48705536 -1.17573933]
 [ 3.79564542 -0.25732297]
 [ 1.30079171  0.76114964]
 [ 2.42781791 -0.37819601]
 [ 1.19900111  0.60609153]
 [ 3.49992004 -0.4606741 ]
 [ 1.38876613  0.20439933]
 [ 2.2754305  -0.33499061]
 [ 2.61409047 -0.56090136]
 [ 1.25850816  0.17970479]
 [ 1.29113206  0.11666865]
 [ 2.12360872  0.20972948]
 [ 2.38800302 -0.4646398 ]
 [ 2.84167278 -0.37526917]
 [ 3.23067366 -1.37416509]
 [ 2.15943764  0.21727758]
 [ 1.44416124  0.14341341]
 [ 1.78129481  0.49990168]
 [ 3.07649993 -0.68808568]
 [ 2.14424331 -0.1400642 ]
 [ 1.90509815 -0.04930053]
 [ 1.16932634  0.16499026]
 [ 2.10761114 -0.37228787]
 [ 2.31415471 -0.18365128]
 [ 1.9222678  -0.40920347]
 [ 1.41523588  0.57491635]
 [ 2.56301338 -0.2778626 ]
 [ 2.41874618 -0.3047982 ]
 [ 1.94410979 -0.1875323 ]
 [ 1.52716661  0.37531698]
 [ 1.76434572 -0.07885885]
 [ 1.90094161 -0.11662796]
 [ 1.39018886  0.28266094]]
pca = PCA(n_components=2)
com_data=pca.fit_transform(x)
print(pca.fit_transform(x))
[[-2.68412563  0.31939725]
 [-2.71414169 -0.17700123]
 [-2.88899057 -0.14494943]
 [-2.74534286 -0.31829898]
 [-2.72871654  0.32675451]
 [-2.28085963  0.74133045]
 [-2.82053775 -0.08946138]
 [-2.62614497  0.16338496]
 [-2.88638273 -0.57831175]
 [-2.6727558  -0.11377425]
 [-2.50694709  0.6450689 ]
 [-2.61275523  0.01472994]
 [-2.78610927 -0.235112  ]
 [-3.22380374 -0.51139459]
 [-2.64475039  1.17876464]
 [-2.38603903  1.33806233]
 [-2.62352788  0.81067951]
 [-2.64829671  0.31184914]
 [-2.19982032  0.87283904]
 [-2.5879864   0.51356031]
 [-2.31025622  0.39134594]
 [-2.54370523  0.43299606]
 [-3.21593942  0.13346807]
 [-2.30273318  0.09870885]
 [-2.35575405 -0.03728186]
 [-2.50666891 -0.14601688]
 [-2.46882007  0.13095149]
 [-2.56231991  0.36771886]
 [-2.63953472  0.31203998]
 [-2.63198939 -0.19696122]
 [-2.58739848 -0.20431849]
 [-2.4099325   0.41092426]
 [-2.64886233  0.81336382]
 [-2.59873675  1.09314576]
 [-2.63692688 -0.12132235]
 [-2.86624165  0.06936447]
 [-2.62523805  0.59937002]
 [-2.80068412  0.26864374]
 [-2.98050204 -0.48795834]
 [-2.59000631  0.22904384]
 [-2.77010243  0.26352753]
 [-2.84936871 -0.94096057]
 [-2.99740655 -0.34192606]
 [-2.40561449  0.18887143]
 [-2.20948924  0.43666314]
 [-2.71445143 -0.2502082 ]
 [-2.53814826  0.50377114]
 [-2.83946217 -0.22794557]
 [-2.54308575  0.57941002]
 [-2.70335978  0.10770608]
 [ 1.28482569  0.68516047]
 [ 0.93248853  0.31833364]
 [ 1.46430232  0.50426282]
 [ 0.18331772 -0.82795901]
 [ 1.08810326  0.07459068]
 [ 0.64166908 -0.41824687]
 [ 1.09506066  0.28346827]
 [-0.74912267 -1.00489096]
 [ 1.04413183  0.2283619 ]
 [-0.0087454  -0.72308191]
 [-0.50784088 -1.26597119]
 [ 0.51169856 -0.10398124]
 [ 0.26497651 -0.55003646]
 [ 0.98493451 -0.12481785]
 [-0.17392537 -0.25485421]
 [ 0.92786078  0.46717949]
 [ 0.66028376 -0.35296967]
 [ 0.23610499 -0.33361077]
 [ 0.94473373 -0.54314555]
 [ 0.04522698 -0.58383438]
 [ 1.11628318 -0.08461685]
 [ 0.35788842 -0.06892503]
 [ 1.29818388 -0.32778731]
 [ 0.92172892 -0.18273779]
 [ 0.71485333  0.14905594]
 [ 0.90017437  0.32850447]
 [ 1.33202444  0.24444088]
 [ 1.55780216  0.26749545]
 [ 0.81329065 -0.1633503 ]
 [-0.30558378 -0.36826219]
 [-0.06812649 -0.70517213]
 [-0.18962247 -0.68028676]
 [ 0.13642871 -0.31403244]
 [ 1.38002644 -0.42095429]
 [ 0.58800644 -0.48428742]
 [ 0.80685831  0.19418231]
 [ 1.22069088  0.40761959]
 [ 0.81509524 -0.37203706]
 [ 0.24595768 -0.2685244 ]
 [ 0.16641322 -0.68192672]
 [ 0.46480029 -0.67071154]
 [ 0.8908152  -0.03446444]
 [ 0.23054802 -0.40438585]
 [-0.70453176 -1.01224823]
 [ 0.35698149 -0.50491009]
 [ 0.33193448 -0.21265468]
 [ 0.37621565 -0.29321893]
 [ 0.64257601  0.01773819]
 [-0.90646986 -0.75609337]
 [ 0.29900084 -0.34889781]
 [ 2.53119273 -0.00984911]
 [ 1.41523588 -0.57491635]
 [ 2.61667602  0.34390315]
 [ 1.97153105 -0.1797279 ]
 [ 2.35000592 -0.04026095]
 [ 3.39703874  0.55083667]
 [ 0.52123224 -1.19275873]
 [ 2.93258707  0.3555    ]
 [ 2.32122882 -0.2438315 ]
 [ 2.91675097  0.78279195]
 [ 1.66177415  0.24222841]
 [ 1.80340195 -0.21563762]
 [ 2.1655918   0.21627559]
 [ 1.34616358 -0.77681835]
 [ 1.58592822 -0.53964071]
 [ 1.90445637  0.11925069]
 [ 1.94968906  0.04194326]
 [ 3.48705536  1.17573933]
 [ 3.79564542  0.25732297]
 [ 1.30079171 -0.76114964]
 [ 2.42781791  0.37819601]
 [ 1.19900111 -0.60609153]
 [ 3.49992004  0.4606741 ]
 [ 1.38876613 -0.20439933]
 [ 2.2754305   0.33499061]
 [ 2.61409047  0.56090136]
 [ 1.25850816 -0.17970479]
 [ 1.29113206 -0.11666865]
 [ 2.12360872 -0.20972948]
 [ 2.38800302  0.4646398 ]
 [ 2.84167278  0.37526917]
 [ 3.23067366  1.37416509]
 [ 2.15943764 -0.21727758]
 [ 1.44416124 -0.14341341]
 [ 1.78129481 -0.49990168]
 [ 3.07649993  0.68808568]
 [ 2.14424331  0.1400642 ]
 [ 1.90509815  0.04930053]
 [ 1.16932634 -0.16499026]
 [ 2.10761114  0.37228787]
 [ 2.31415471  0.18365128]
 [ 1.9222678   0.40920347]
 [ 1.41523588 -0.57491635]
 [ 2.56301338  0.2778626 ]
 [ 2.41874618  0.3047982 ]
 [ 1.94410979  0.1875323 ]
 [ 1.52716661 -0.37531698]
 [ 1.76434572  0.07885885]
 [ 1.90094161  0.11662796]
 [ 1.39018886 -0.28266094]]
pca.explained_variance_ratio_
array([0.92461872, 0.05306648])
plt.scatter(com_data[:,0],com_data[:,1],marker="o",c=y,cmap="coolwarm")
plt.xlabel("com_1")
plt.ylabel("com_2")
plt.show()

在这里插入图片描述

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值