原理:待补充
流程:待补充
代码转自:
def percentage2n(eigVals,percentage):
sortArray=np.sort(eigVals) #升序
sortArray=sortArray[-1::-1] #逆转,即降序
arraySum=sum(sortArray)
tmpSum=0
num=0
for i in sortArray:
tmpSum+=i
num+=1
if tmpSum>=arraySum*percentage:
return num
import numpy as np
def zeroMean(dataMat):
meanVal=np.mean(dataMat,axis=0) #按列求均值,即求各个特征的均值
newData=dataMat-meanVal
return newData,meanVal
def pca(dataMat,n):
newData,meanVal=zeroMean(dataMat)
covMat=np.cov(newData,rowvar=0) #求协方差矩阵,return ndarray;若rowvar非0,一列代表一个样本,为0,一行代表一个样本
eigVals,eigVects=np.linalg.eig(np.mat(covMat))#求特征值和特征向量,特征向量是按列放的,即一列代表一个特征向量
print('covMat.shape',covMat.shape)
eigValIndice=np.argsort(eigVals) #对特征值从小到大排序
n_eigValIndice=eigValIndice[-1:-(n+1):-1] #最大的n个特征值的下标
n_eigVect=eigVects[:,n_eigValIndice] #最大的n个特征值对应的特征向量
lowDDataMat=newData*n_eigVect #低维特征空间的数据
reconMat=(lowDDataMat*n_eigVect.T)+meanVal #重构数据
return lowDDataMat,reconMat
dataMat = [[1,2,3,4],[2,3,4,10],[1,5,6,28],[1,2,4,1],[1,2,3,4],[1,2,3,4],[1,2,3,4],[1,2,3,4]]
pca(dataMat,2)
covMat.shape (4, 4)
(matrix([[ -3.45898736, -0.28022317],
[ 2.69546487, 0.27883528],
[ 20.91013322, 0.05452816],
[ -6.31066127, 1.06775239],
[ -3.45898736, -0.28022317],
[ -3.45898736, -0.28022317],
[ -3.45898736, -0.28022317],
[ -3.45898736, -0.28022317]]),
matrix([[ 1.0594375 , 2.00726655, 2.98693413, 4.0002547 ],
[ 1.18654049, 2.90055019, 4.17881897, 9.99651423],
[ 1.23800323, 5.02909718, 5.94768087, 28.00101987],
[ 1.27826878, 2.03401986, 3.93882949, 1.00119242],
[ 1.0594375 , 2.00726655, 2.98693413, 4.0002547 ],
[ 1.0594375 , 2.00726655, 2.98693413, 4.0002547 ],
[ 1.0594375 , 2.00726655, 2.98693413, 4.0002547 ],
[ 1.0594375 , 2.00726655, 2.98693413, 4.0002547 ]]))
0.85**2+0.53**2
1.0034
a = zip([('a','b'),('c','d')])
a = [1,2]
b = [3,4]
c = list(zip(a,b))
a = np.array([[11,22,33],[1,2,3],[1,2,3]])
mean = np.mean(a,0)
print('0行1列的cov:\n',np.dot(a[:,0]-mean[0],a[:,1]-mean[1])/(3-1))
covMat = np.cov(a,rowvar = 0)
print(np.cov(a,rowvar = 0))
0行1列的cov:
66.6666666667
[[ 33.33333333 66.66666667 100. ]
[ 66.66666667 133.33333333 200. ]
[ 100. 200. 300. ]]
eigVals,eigVects = np.linalg.eig(covMat)
print('eigVals',eigVals,'\neigVects,',eigVects)
eigVals [ 0.00000000e+00 4.66666667e+02 -3.02071477e-14]
eigVects, [[-0.96362411 0.26726124 -0.35856858]
[ 0.14824986 0.53452248 -0.71713717]
[ 0.22237479 0.80178373 0.5976143 ]]
eigK = eigVals.argsort()[-1:-3:-1]
eigVect = eigVects[:,eigK]
print(eigVect)
[[ 0.26726124 -0.96362411]
[ 0.53452248 0.14824986]
[ 0.80178373 0.22237479]]
lowData = np.dot((a-mean),eigVect)
print(lowData)
[[ 2.49443826e+01 1.99840144e-15]
[ -1.24721913e+01 -9.99200722e-16]
[ -1.24721913e+01 -9.99200722e-16]]
pca(a,2)
covMat.shape (3, 3)
(matrix([[ 2.49443826e+01, 1.99840144e-15],
[ -1.24721913e+01, -9.99200722e-16],
[ -1.24721913e+01, -9.99200722e-16]]), matrix([[ 11., 22., 33.],
[ 1., 2., 3.],
[ 1., 2., 3.]]))