PCA实现

原理:待补充
流程:待补充

代码转自:

def percentage2n(eigVals,percentage):  
    sortArray=np.sort(eigVals)   #升序  
    sortArray=sortArray[-1::-1]  #逆转,即降序  
    arraySum=sum(sortArray)  
    tmpSum=0  
    num=0  
    for i in sortArray:  
        tmpSum+=i  
        num+=1  
        if tmpSum>=arraySum*percentage:  
            return num 
import numpy as np
def zeroMean(dataMat):        
    meanVal=np.mean(dataMat,axis=0)     #按列求均值,即求各个特征的均值  
    newData=dataMat-meanVal  
    return newData,meanVal  

def pca(dataMat,n):  
    newData,meanVal=zeroMean(dataMat)  
    covMat=np.cov(newData,rowvar=0)    #求协方差矩阵,return ndarray;若rowvar非0,一列代表一个样本,为0,一行代表一个样本  
    eigVals,eigVects=np.linalg.eig(np.mat(covMat))#求特征值和特征向量,特征向量是按列放的,即一列代表一个特征向量
    print('covMat.shape',covMat.shape)
    eigValIndice=np.argsort(eigVals)            #对特征值从小到大排序  
    n_eigValIndice=eigValIndice[-1:-(n+1):-1]   #最大的n个特征值的下标  
    n_eigVect=eigVects[:,n_eigValIndice]        #最大的n个特征值对应的特征向量  
    lowDDataMat=newData*n_eigVect               #低维特征空间的数据  
    reconMat=(lowDDataMat*n_eigVect.T)+meanVal  #重构数据  
    return lowDDataMat,reconMat
dataMat = [[1,2,3,4],[2,3,4,10],[1,5,6,28],[1,2,4,1],[1,2,3,4],[1,2,3,4],[1,2,3,4],[1,2,3,4]]
pca(dataMat,2)
covMat.shape (4, 4)





(matrix([[ -3.45898736,  -0.28022317],
         [  2.69546487,   0.27883528],
         [ 20.91013322,   0.05452816],
         [ -6.31066127,   1.06775239],
         [ -3.45898736,  -0.28022317],
         [ -3.45898736,  -0.28022317],
         [ -3.45898736,  -0.28022317],
         [ -3.45898736,  -0.28022317]]),
 matrix([[  1.0594375 ,   2.00726655,   2.98693413,   4.0002547 ],
         [  1.18654049,   2.90055019,   4.17881897,   9.99651423],
         [  1.23800323,   5.02909718,   5.94768087,  28.00101987],
         [  1.27826878,   2.03401986,   3.93882949,   1.00119242],
         [  1.0594375 ,   2.00726655,   2.98693413,   4.0002547 ],
         [  1.0594375 ,   2.00726655,   2.98693413,   4.0002547 ],
         [  1.0594375 ,   2.00726655,   2.98693413,   4.0002547 ],
         [  1.0594375 ,   2.00726655,   2.98693413,   4.0002547 ]]))
0.85**2+0.53**2
1.0034
a = zip([('a','b'),('c','d')])
a = [1,2]
b = [3,4]
c = list(zip(a,b))
a = np.array([[11,22,33],[1,2,3],[1,2,3]])
mean = np.mean(a,0)
print('0行1列的cov:\n',np.dot(a[:,0]-mean[0],a[:,1]-mean[1])/(3-1))
covMat = np.cov(a,rowvar = 0)
print(np.cov(a,rowvar = 0))
0行1列的cov:
 66.6666666667
[[  33.33333333   66.66666667  100.        ]
 [  66.66666667  133.33333333  200.        ]
 [ 100.          200.          300.        ]]
eigVals,eigVects = np.linalg.eig(covMat)
print('eigVals',eigVals,'\neigVects,',eigVects)
eigVals [  0.00000000e+00   4.66666667e+02  -3.02071477e-14] 
eigVects, [[-0.96362411  0.26726124 -0.35856858]
 [ 0.14824986  0.53452248 -0.71713717]
 [ 0.22237479  0.80178373  0.5976143 ]]
eigK = eigVals.argsort()[-1:-3:-1]
eigVect = eigVects[:,eigK]
print(eigVect)
[[ 0.26726124 -0.96362411]
 [ 0.53452248  0.14824986]
 [ 0.80178373  0.22237479]]
lowData = np.dot((a-mean),eigVect)
print(lowData)
[[  2.49443826e+01   1.99840144e-15]
 [ -1.24721913e+01  -9.99200722e-16]
 [ -1.24721913e+01  -9.99200722e-16]]
pca(a,2)
covMat.shape (3, 3)





(matrix([[  2.49443826e+01,   1.99840144e-15],
         [ -1.24721913e+01,  -9.99200722e-16],
         [ -1.24721913e+01,  -9.99200722e-16]]), matrix([[ 11.,  22.,  33.],
         [  1.,   2.,   3.],
         [  1.,   2.,   3.]]))
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值