import numpy as np
from sklearn.decomposition import PCA
from sklearn import datasets
import matplotlib.pyplot as plt
class PrincipalComponentAnalysis(object):
def __init__(self,X=None, k=None):
self.X = X
self.k = k
def fit(self,X,k):
self.X = X
self.k = k
self.nSample, self.nDim = self.X.shape
Xmean = np.mean(self.X,axis=0)
self.Xnorm = self.X - Xmean
scatter_matric = self.Xnorm.T @ self.Xnorm
eig_val, eig_vec = np.linalg.eig(scatter_matric)
ord_ids = np.flipud(np.argsort(eig_val))
feature = np.zeros((self.nDim,self.k))
for i in range(self.k):
feature[:,i] = eig_vec[:,ord_ids[i]]
data = self.Xnorm @ feature
return data
if __name__ == '__main__':
X,y = datasets.load_iris(return_X_y=True)
pca = PrincipalComponentAnalysis()
data = pca.fit(X=X,k=2)
print(data.shape)
plt.rcParams['font.sans-serif'] = ['SimHei'] # 步骤一(替换sans-serif字体)
plt.rcParams['axes.unicode_minus'] = False # 步骤二(解决坐标轴负数的负号显示问题)
fig = plt.figure(figsize=(10,5))
ax1 = fig.add_subplot(121)
ax2 = fig.add_subplot(122)
ax1.scatter(data[:,0],data[:,1],c=y)
ax1.set_title("python实现")
ax1.set_xlabel("x轴")
ax1.set_ylabel("y轴")
pca1 = PCA(n_components=2)
data1 = pca1.fit_transform(X=X)
ax2.scatter(data1[:,0],-data1[:,1],c=y)
ax2.set_title("调包实现")
plt.show()
参考:
https://zhuanlan.zhihu.com/p/47858230
https://zhuanlan.zhihu.com/p/77151308