目录
一、 MNIST训练
代码示例:
import numpy as np
from sklearn.datasets import fetch_openml
#导入数据
mnist = fetch_openml('mnist_784')
X,y = mnist['data'],mnist['target']
X_train = np.array(X[:60000],dtype=float) #60000个样本,784个维度
y_train = np.array(X[:60000],dtype=float)
X_test = np.array(X[:60000],dtype=float)
y_test = np.array(X[:60000],dtype=float)
# KNN识别
from sklearn.neighbors import KNeighborsClassifier
knn_clf = KNeighborsClassifier()
knn_clf.fit(X_train,y_train)
# %time knn_clf.score(X_test, y_test)
# PCA降维
from sklearn.decomposition import PCA
pca = PCA(0.90)
pca.fit(X_train)
X_train_reduction = pca.transform(X_train)
X_test_reduction = pca.transform(X_test)
knn_clf = KNeighborsClassifier()
knn_clf.fit(X_train_reduction, y_train)
%time knn_clf.score(X_test_reduction, y_test)
二、手写识别数据集PCA降噪
代码示例:
from sklearn import datasets
digites = datasets.load_digits()
X = digits.data
y = digits.target
noisy_digits = X + np.random.normal(0,4,size=X.shape)
example_digits = noisy_digits[y==0,:][:10]
for num in range(1,10):
X_num = noisy_digits[y==num,:][:10]
example_digits = np.vstack([example_digits,X_num])
#图形可视化
def plot_digits(data):
fig, axes = plt.subplots(10,10,figsize=(10,10),subplot_kw = {'xticks':[],'yticks':[]},
gridspec_kw = dict(hspace=0.1,wspace=0.1))
for i, ax in enumerate(axes.flat):
ax.imshow(data[i].reshape(8,8),
cmap='binary',interpolation='nearest',clim=(0,16))
plt.show()
plot_digits(example_digits)
运行结果:
代码示例:
#PCA降维
pca = PCA(0.5)
pca.fit(noisy_digits)
components = pca.transform(example_digits) #降维
filtered_digits = pca.inverse_transform(components) #低维到高维的恢复,损失的噪音没有了
plot_digits(filtered_digits)
运行结果: