导入数据
from sklearn.datasets import load_digits
digits = load_digits(n_class=6)
X, y = digits.data, digits.target
n_samples, n_features = X.shape
n_neighbors = 30
可视化数字
import matplotlib.pyplot as plt
fig, axs = plt.subplots(nrows=10, ncols=10, figsize=(6, 6))
for idx, ax in enumerate(axs.ravel()):
ax.imshow(X[idx].reshape((8, 8)), cmap=plt.cm.binary)
ax.axis("off")
fig.suptitle("A selection from the 64-dimensional digits dataset", fontsize=16)
可视化的函数
import numpy as np
from matplotlib import offsetbox
from sklearn.preprocessing import MinMaxScaler
def plot_embedding(X, title, ax):
X = MinMaxScaler().fit_transform(X)
shown_images = np.array([[1.0, 1.0]])
for i in range(X.shape[0]):
ax.text(
X[i, 0],
X[i, 1],
str(y[i]),
color=plt.cm.Dark2(y[i]),
fontdict={"weight": "bold", "size": 9},
)
dist = np.sum((X[i] - shown_images) ** 2, 1)
if np.min(dist) < 4e-3:
continue
shown_images = np.concatenate([shown_images, [X[i]]], axis=0)
imagebox = offsetbox.AnnotationBbox(
offsetbox.OffsetImage(digits.images[i], cmap=plt.cm.gray_r), X[i]
)
ax.add_artist(imagebox)
ax.set_title(title)
ax.axis("off")
流形学习的算法
from sklearn.decomposition import TruncatedSVD
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.ensemble import RandomTreesEmbedding
from sklearn.manifold import (
Isomap,
LocallyLinearEmbedding,
MDS,
SpectralEmbedding,
TSNE,
)
from sklearn.neighbors import NeighborhoodComponentsAnalysis
from sklearn.pipeline import make_pipeline
from sklearn.random_projection import SparseRandomProjection
embeddings = {
"Random projection embedding": SparseRandomProjection(
n_components=2, random_state=42
),
"Truncated SVD embedding": TruncatedSVD(n_components=2),
"Linear Discriminant Analysis embedding": LinearDiscriminantAnalysis(
n_components=2
),
"Isomap embedding": Isomap(n_neighbors=n_neighbors, n_components=2),
"Standard LLE embedding": LocallyLinearEmbedding(
n_neighbors=n_neighbors, n_components=2, method="standard"
),
"Modified LLE embedding": LocallyLinearEmbedding(
n_neighbors=n_neighbors, n_components=2, method="modified"
),
"Hessian LLE embedding": LocallyLinearEmbedding(
n_neighbors=n_neighbors, n_components=2, method="hessian"
),
"LTSA LLE embedding": LocallyLinearEmbedding(
n_neighbors=n_neighbors, n_components=2, method="ltsa"
),
"MDS embedding": MDS(n_components=2, n_init=1, max_iter=100),
"Random Trees embedding": make_pipeline(
RandomTreesEmbedding(n_estimators=200, max_depth=5, random_state=0),
TruncatedSVD(n_components=2),
),
"Spectral embedding": SpectralEmbedding(
n_components=2, random_state=0, eigen_solver="arpack"
),
"t-SNE embeedding": TSNE(
n_components=2, init="pca", learning_rate="auto", random_state=0
),
"NCA embedding": NeighborhoodComponentsAnalysis(
n_components=2, init="random", random_state=0
),
}
对图像数据进行非线性降维(降至2维)
from time import time
projections, timing = {}, {}
for name, transformer in embeddings.items():
if name.startswith("Linear Discriminant Analysis"):
data = X.copy()
data.flat[:: X.shape[1] + 1] += 0.01
else:
data = X
try:
print(f"Computing {name}...")
start_time = time()
projections[name] = transformer.fit_transform(data, y)
timing[name] = time() - start_time
except :
continue
可视化降维情况
from itertools import zip_longest
fig, axs = plt.subplots(nrows=7, ncols=2, figsize=(17, 24))
for name, ax in zip_longest(timing, axs.ravel()):
if name is None:
ax.axis("off")
continue
title = f"{name} (time {timing[name]:.3f}s)"
plot_embedding(projections[name], title, ax)
plt.show()