主成分分析简介:
对指标变量矩阵进行主成分分析,是降维方法之一,应调用 from sklearn.decomposition import PCA
数据举例
num | gps_w | gps_j | price | st |
A0001 | 22.56614225 | 113.9808368 | 66 | 0 |
A0002 | 22.68620526 | 113.9405252 | 65.5 | 0 |
A0003 | 22.57651183 | 113.957198 | 65.5 | 1 |
A0004 | 22.56484081 | 114.2445711 | 75 | 0 |
A0005 | 22.55888775 | 113.9507227 | 65.5 | 0 |
A0006 | 22.55899906 | 114.2413174 | 75 | 0 |
A0007 | 22.54900371 | 113.9722597 | 65.5 | 1 |
A0008 | 22.56277351 | 113.9565735 | 65.5 | 0 |
A0009 | 22.50001192 | 113.8956606 | 66 | 0 |
#-*- coding: utf-8 -*-
#主成分分析 降维
import pandas as pd
#参数初始化
inputfile = 'pdata.xls'
outputfile = 'dimention_reducted.xls' #降维后的数据
data = pd.read_excel(inputfile, index_col = u'num') #读入数据
from sklearn.decomposition import PCA
pca = PCA(n_components='mle',copy=False,whiten=False)
pca.fit(data)
pca.components_ #返回模型的各个特征向量
a=pca.explained_variance_ratio_ #返回各个成分各自的方差百分比
low_d=pca.transform(data)#降维处理
print low_d
print a
#pca.inverse_transform(low_d) #复原数据
结果数据过长
[[ -3.1286575 0.70258512 0.37438004]
[ -3.62752518 0.6594219 0.28951319]
[ -3.60629098 -0.27213001 0.63513077]
...,
[ 15.89052204 0.11115613 0.43928345]
[ -3.60214682 -0.40482886 0.27289147]
[ 15.89614163 -0.10911559 -0.30845716]]
[ 0.97968047 0.01112823 0.0077572 ]