#!/usr/bin/python2
#-*-coding:UTF-8-*-
from numpy import *
#整个PCA的实现中,是约定所有的数据记录中不含有标签的,因此,在调用之前,标签应该另做存放
#The centralize function change the dataSet in the original position
def centralize(dataSet): #The dataSet must be of array type
meanRow=mean(dataSet,axis=0,keepdims=False)
dataSet-meanRow
return dataSet,meanRow
def getCov(dataSet): #Get the cov matrix
return cov(dataSet,rowvar=0)
def getEigValsVects(covMat): #The covMat object must be of matrix type !
eigVals,eigVects=linalg.eig(covMat)
return eigVals,eigVects
def selectEigValsVects(eigVals,eigVects,num):
valIndices=argsort(eigVals)
valIndices=valIndices[-1::-1]
selectIndices=valIndices[:num]
selectVals=eigVals[selectIndices]
selectVects=eigVects[:,selectIndices]
return selectVals,selectVects
def percent2Num(eigVals,percent):
sortedEigVals=sort(eigVals)
sortedEigVals=sortedEigVals[-1::-1]
valSum=sum(sortedEigVals)
tempSum=0.0
for i in range(len(sortedEigVals)):
tempSum+=sortedEigVals[i]
if tempSum>percent*valSum:
return i+1
def pca(dataSet,percent):
dataSet,meanRow=centralize(dataSet)
covSet=getCov(dataSet)
eigVals,eigVects=getEigValsVects(mat(covSet))
valNum=percent2Num(eigVals,0.95)
selectVals,selectVects=selectEigValsVects(eigVals,eigVects,valNum)
newDataSet=dataSet*selectVects
return newDataSet
PCA的实现
最新推荐文章于 2023-10-04 11:36:06 发布