import pandas as pd
from sklearn.decomposition import PCA
inputfile = './data.xls'
outputfile = './reduced_data.xls'
data = pd.read_excel(inputfile,header=None)
print(data)
pca = PCA()
pca.fit(data)
print(pca.components_)
print(pca.explained_variance_)
print(pca.explained_variance_ratio_)
pca = PCA(3)
pca.fit(data)
low_d = pca.transform(data)
pd.DataFrame(low_d).to_excel(outputfile)
data = pca.inverse_transform(low_d)
print(data)
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.datasets.samples_generator import make_blobs
from sklearn.decomposition import PCA
X,y = make_blobs(n_samples=10000,n_features=3,centers=[[3,3,3],[0,0,0],[1,1,1],[2,2,2]],cluster_std=[0.2,0.1,0.2,0.2],random_state=9)
fig = plt.figure()
ax = Axes3D(fig,rect=[0,0,1,1],elev=30,azim=10)
plt.scatter(X[:,0],X[:,1],X[:,2],marker='o')
plt.show()
pca = PCA(n_components=3)
pca.fit(X)
print(pca.explained_variance_ratio_)
print(pca.explained_variance_)
pca = PCA(n_components=2)
pca.fit(X)
X_new = pca.transform(X)
plt.scatter(X_new[:,0],X_new[:,1],marker='o')
plt.show()
from numpy import *
def eigValPct(eigVals,percentage):
sortArray = sort(eigVals)
sortArray = sortArray[-1::-1]
arraySum = sum(sortArray)
tempSum = 0
num = 0
for i in sortArray:
tempSum+=i
num+=1
if tempSum>=arraySum*percentage:
return num
def pca(dataMat,percentage=0.9):
meanVals = mean(dataMat,axis=0)
meanRemoved = dataMat-meanVals
covMat = cov(meanRemoved,rowvar=0)
eigVals,eigVects = linalg.eig(mat(covMat))
k = eigValPct(eigVals,percentage)
eigVallnd = argsort(eigVals)
eigVallnd = eigVallnd[:-(k+1):-1]
redEigVects = eigVects[:,eigVallnd]
lowDDataMat = meanRemoved*redEigVects
reconMat = (lowDDataMat*redEigVects.T)+meanVals
return lowDDataMat,reconMat