def PCA(dataSet):
#计算平均值
n=len(dataSet)
meanVal=np.mean(dataSet,axis=0) #axis=0 , Along the direction of the row
#中心化及尺度变换
CenteringMat=dataSet-meanVal
StdVal=np.std(dataSet,axis=0)
StdVal=(StdVal*np.sqrt(n))/np.sqrt(n-1)
StandardMat=CenteringMat/StdVal
#相关矩阵
CorrMat=np.corrcoef(CenteringMat,rowvar=0)
#特征值特征向量
eigVals,eigVectors=np.linalg.eig(np.mat(CorrMat)) #eigenvalues and eigenvectors
#排序(小-大)
Index=np.argsort(eigVals)
eigVals_ordered=eigVals[Index[::-1]]
eigVectors_ordered=eigVectors[:,Index[::-1]]
#t这些特征向量将构成后面对数据进行转换的矩阵,该矩阵则利用N个特征将原始数据转换到新空间中
newdata=np.round(StandardMat.dot(eigVectors_ordered),4)
newframe=pd.DataFrame(newdata)
newCovMat=np.cov(newdata,rowvar=0)
#k
k=np.sqrt(eigVals_ordered[0]/np.min(eigVals_ordered))
print('k:',k)
if k>=15:
print('k:Multicollinearity is present!')
else:
print('k:Non-Multicollinearity')
m=np.sum(1/eigVals_ordered)
print('m:',m)
if m>=5*len(eigVals_ordered):
print('m:Multicollinearity is present!')
else:
print('m:Non-Multicollinearity')
print('\nOrdered eignvalues:\n',np.round(eigVals_ordered,3))
print('\nEignvectors:\n',np.round(eigVectors_ordered,3))
print('\nNew variance-covariance matrix:\n ',np.round(newCovMat,3))
return newframe
定义pca
最新推荐文章于 2024-09-22 17:12:26 发布