来源机器学习实战
from numpy import *
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
def loadDataSet(fileName,delim='\t'):
fr = open(fileName)
stringArr = [line.strip().split(delim) for line in fr.readlines()]
datArr = [list(map(float,line) )for line in stringArr]
return mat(datArr)
def pca(dataMat,topNfeat=9999999):
#去平均值
meanVals = mean(dataMat, axis=0)
meanRemoved = dataMat - meanVals
covMat = cov(meanRemoved,rowvar=0)
eigVals,eigVects = linalg.eig(mat(covMat))
eigValInd = argsort(eigVals)
#从小到大对N个值排序
eigValInd = eigValInd[:-(topNfeat+1):-1]
redEigVects = eigVects[:,eigValInd]
#将数据转换到新空间
lowDDataMat = meanRemoved * redEigVects
reconMat = (lowDDataMat * redEigVects.T) + meanVals
return lowDDataMat,reconMat
dataMat = loadDataSet('chr1.txt')
lowDMat,reconMat = pca(dataMat,1)
print(shape(lowDMat))
print(reconMat)
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(dataMat[:,0].flatten().A[0],dataMat[:,0].flatten().A[0],marker='s',s=90)
ax.scatter(reconMat[:,0].flatten().A[0],reconMat[:,0].flatten().A[0],marker='o',s=50,c='red')
plt.show()
# <matplotlib.collections.PathCollection object at 0X029B5C50>
运行结果: