PCA算法流程
1、数据预处理
2、求协方差矩阵
3、对协方差矩阵做特征值分解
4、选出最大的K个特征值对应K个特征向量
5、将原始数据投影到选取的特征向量
6、输出投影后的数据集
样本点
PyCharm实现
pycharm代码实现
// An highlighted block
import numpy as np
import matplotlib.pyplot as plt
data = np.genfromtxt('data.csv',delimiter = ',')
x_data = data[:,0]
y_data = data[:,1]
#plt.scatter(x_data,y_data)
#plt.show()
#预处理中心化
def zeroMean(dataMat):
meanVal = np.mean(dataMat,axis = 0)
newData = dataMat - meanVal
return newData, meanVal
newData,meanVal = zeroMean(data)
covMat = np.cov(newData,rowvar = 0 )
eigVals,eigVects = np.linalg.eig(np.mat(covMat))
top = 1
eigValIndice = np.argsort(eigVals)
n_eigValIndice = eigValIndice[-1:-(top+1):-1]
n_eigVect = eigVects[:,n_eigValIndice]
lowDDataMat = newData * n_eigVect
reconMat = lowDDataMat * n_eigVect.T + meanVal
#原始数据
x_data = data[:,0]
y_data = data[:,1]
plt.scatter(x_data,y_data)
#plt.show()
#重构后数据
x1_data = np.array(reconMat)[:,0]
y1_data = np.array(reconMat)[:,1]
plt.scatter(x1_data,y1_data, c='r')
plt.show()