主要内容:通过构造协方差矩阵,计算保持原有数据95%特征信息所需要的特征数 ,通过PCA降维构造新的数据集
#通过鸢尾花数据集演示PCA操作
import pandas as pd
from sklearn.datasets import load_iris
import numpy as np
iris = load_iris()
X, y = iris.data, iris.target
df = pd.DataFrame(np.hstack((X, y.reshape(-1, 1))),index = range(X.shape[0]),columns=['sepal_len','sepal_wid','petal_len','petal_wid','class'] )
# print(df['class'].value_counts())
#
# X = df.ix[:,0:4].values
# y = df.ix[:,4].values
from matplotlib import pyplot as plt
# import math
#
label_dict = {0:'Iris-Setosa',
1:'Iris-Versicolor',
2:'Iris-Virgnica'
}
# feature_dict = {0:'sepal length [cm]',
# 1:'sepal width [cm]',
# 2:'petal length [cm]',
# 3:'petal width [cm]'}
# plt.figure(figsize