In [50]: import numpy as np
In [51]: import matplotlib.pyplot as plt
In [52]: X = np.empty((100,2))
...: X[:,0] = np.random.uniform(0.,100.,size=100)
...: X[:,1] = 0.75 * X[:,0] + 3. + np.random.normal(0,5,size=100)
In [53]: plt.scatter(X[:,0],X[:,1])
In [54]: from sklearn.decomposition import PCA
...:
...: pca = PCA(n_components=1)
...: pca.fit(X)
...: X_reduction = pca.transform(X)
...: X_restore = pca.inverse_transform(X_reduction)
In [55]: plt.scatter(X_restore[:,0],X_restore[:,1])
降低了维度,丢失了信息,但是在PCA降维的过程中,可能也去除了噪音
人脸识别
载入人脸数据集
In [59]: import numpy as np
...: import matplotlib.pyplot as plt
In [60]: from sklearn.datasets import fetch_lfw_people
In [61]: faces = fetch_lfw_people()
#13233个人脸,2914个特征
In [63]: faces.data.shape
Out[63]: (13233, 2914)
#把每个样本以二维的数据展示出来
In [65]: faces.images.shape
Out[65]: (13233, 62, 47)
#随机选取36张脸
In [66]: random_indexes = np.random.permutation(len(faces.data))
...: X = faces.data[random_indexes]
In [67]: example_faces = X[:36,:]
...: example_faces.shape
Out[67]: (36, 2914)
绘制人脸
In [72]: def plot_faces(faces):
...: fig, axes = plt.subplots(6, 6, figsize=(10,10),
subplot_kw={'xticks':[],'yticks':[]},
gridspec_kw=dict(hspace=0.1, wspace=0.1))
...: for i, ax in enumerate(axes.flat):
...: ax.imshow(faces[i].reshape(62,47),cmap='bone')
...: plt.show()
...:
...: plot_faces(example_faces)
特征脸
In [75]: %%time
#采用随机的方式实例化pca
...: pca = PCA(svd_solver='randomized')
...: pca.fit(X)
#2914个主成分
In [77]: pca.components_.shape
Out[77]: (2914, 2914)
#绘制特征脸
In [78]: plot_faces(pca.components_[:36])
特征脸依据重要程度顺次排列