PCA对手写数字数据集的降维案例
数据集获取地址:->这里下载
导入需要的模块和库
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier as RFC
from sklearn.model_selection import cross_val_score
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
导入数据,探索数据
data = pd.read_csv("../数据/digit recognizor.csv")
x = data.iloc[:,1:]
y = data.iloc[:,0]
查看数据维度
data.shape
x.shape
画累计方差贡献率曲线,找最佳降维后维度的范围
pca_line = PCA().fit(x)#实例化
plt.figure(figsize=[20,5])#创建画布
plt.plot(np.cumsum(pca_line.explained_variance_ratio_