t-SNE(降维)可视化 (sklearn.manifold.TSNE)(sklearn digits手写数据集)

      t-SNE和PCA一样,都是一种降维方法,是最好的降维方法之一

      t-SNE是一种集降维与可视化于一体的技术,它是基于SNE可视化的改进,解决了SNE在可视化后样本分布拥挤、边界不明显的特点,是目前最好的降维可视化手段。 

      sklearn.manifold.TSNE

code is from GitHub - seominseok0429/label-smoothing-visualization-pytorch: When Does Label Smoothing Help?_pytorch_implementationimp

113 stars

from time import time
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import offsetbox
from sklearn import manifold, datasets
# ----------------------------------------------------------------------
# Scale and visualize the embedding vectors
def plot_embedding(X, title=None):
    x_min, x_max = np.min(X, 0), np.max(X, 0)
    X = (X - x_min) / (x_max - x_min)
 
    plt.figure()
    ax = plt.subplot(111)
    for i in range(X.shape[0]):  #遍历所有1083个图
        plt.text(X[i, 0], X[i, 1], str(y[i]),
                 color=plt.cm.Set1(y[i] / 10.),  #cm代表color map,即颜色映射地图,Set1, Set2, Set3是它的三个颜色集合,可返回颜色
                 fontdict={'weight': 'bold', 'size': 9})

    plt.xticks([]), plt.yticks([])
    if title is not None:
        plt.title(title)

 
# ----------------------------------------------------------------------
digits = datasets.load_digits(n_class=6)
X = digits.data  #X是(1083,64)

y = digits.target #y是 (1083)
#即共1083张图, X的每张图用一个64维的矩阵表示
# t-SNE embedding of the digits dataset
print("Computing t-SNE embedding")
tsne = manifold.TSNE(n_components=2, init='pca', random_state=0)
#把64维降到2维

X_tsne = tsne.fit_transform(X)
#X_tsne是(1083,2)
 
plot_embedding(X_tsne,
               "t-SNE embedding"
               )
plt.show()

from time import time
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import offsetbox
from sklearn import manifold, datasets

digits = datasets.load_digits(n_class=6)
X = digits.data  #X是(1083,64)
y = digits.target #y是 (1083)
#即共1083张图, X的每张图用一个64维的矩阵表示
n_samples, n_features = X.shape
n_neighbors = 30


# ----------------------------------------------------------------------
# Scale and visualize the embedding vectors
def plot_embedding(X, title=None):
    x_min, x_max = np.min(X, 0), np.max(X, 0)
    X = (X - x_min) / (x_max - x_min)

    plt.figure()
    ax = plt.subplot(111)
    for i in range(X.shape[0]):  #遍历所有1083个图
        plt.text(X[i, 0], X[i, 1], str(y[i]),
                 color=plt.cm.Set1(y[i] / 10.),  #cm代表color map,即颜色映射地图,Set1, Set2, Set3是它的三个颜色集合,可返回颜色
                 fontdict={'weight': 'bold', 'size': 9})

    if hasattr(offsetbox, 'AnnotationBbox'):
        # only print thumbnails with matplotlib > 1.0
        shown_images = np.array([[1., 1.]])  # just something big
        for i in range(X.shape[0]):
            dist = np.sum((X[i] - shown_images) ** 2, 1)
            if np.min(dist) < 4e-3:
                # don't show points that are too close
                continue
            shown_images = np.r_[shown_images, [X[i]]]
            imagebox = offsetbox.AnnotationBbox(
                offsetbox.OffsetImage(digits.images[i], cmap=plt.cm.gray_r),
                X[i]
            )
            ax.add_artist(imagebox)

    plt.xticks([]), plt.yticks([])
    if title is not None:
        plt.title(title)


# ----------------------------------------------------------------------
# Plot images of the digits
#只取了前20*20=400个
n_img_per_row = 20
img = np.zeros((10 * n_img_per_row, 10 * n_img_per_row))
for i in range(n_img_per_row):
    ix = 10 * i + 1
    for j in range(n_img_per_row):
        iy = 10 * j + 1
        img[ix:ix + 8, iy:iy + 8] = X[i * n_img_per_row + j].reshape((8, 8))

plt.imshow(img, cmap=plt.cm.binary)
plt.xticks([])
plt.yticks([])
plt.title('A selection from the 64-dimensional digits dataset')
plt.show()

# ----------------------------------------------------------------------
# t-SNE embedding of the digits dataset
print("Computing t-SNE embedding")

tsne = manifold.TSNE(n_components=2, init='pca', random_state=0)
#把64维降到2维

t0 = time()
X_tsne = tsne.fit_transform(X)
#X_tsne是(1083,2)

plot_embedding(X_tsne,
               "t-SNE embedding of the digits (time %.2fs)" %(time() - t0)
               )

plt.show()

如果没有hasattr(offsetbox, 'AnnotationBbox') 这部分那么结果会是这样

  • 1
    点赞
  • 19
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
以下是使用 PyTorch 和 scikit-learn 实现的 DANN 特征提取器 t-SNE 降维可视化代码: ```python import torch import torch.nn.functional as F import matplotlib.pyplot as plt from sklearn.manifold import TSNE # 加载数据集 train_loader = torch.utils.data.DataLoader( torchvision.datasets.MNIST('/files/', train=True, download=True, transform=torchvision.transforms.Compose([ torchvision.transforms.ToTensor(), torchvision.transforms.Normalize( (0.1307,), (0.3081,)) ])), batch_size=64, shuffle=True) # 定义 DANN 特征提取器 class FeatureExtractor(torch.nn.Module): def __init__(self): super(FeatureExtractor, self).__init__() self.conv1 = torch.nn.Conv2d(1, 64, kernel_size=5) self.conv2 = torch.nn.Conv2d(64, 50, kernel_size=5) self.conv2_drop = torch.nn.Dropout2d() self.fc1 = torch.nn.Linear(50*4*4, 100) self.fc2 = torch.nn.Linear(100, 10) def forward(self, x): x = F.relu(F.max_pool2d(self.conv1(x), 2)) x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2)) x = x.view(-1, 50*4*4) x = F.relu(self.fc1(x)) x = F.dropout(x, training=self.training) x = self.fc2(x) return x # 加载模型参数 model = FeatureExtractor() model.load_state_dict(torch.load('dann_feature_extractor.pth')) # 提取特征向量 features = [] labels = [] for data, target in train_loader: features.append(model(data).detach().numpy()) labels.append(target.detach().numpy()) features = np.concatenate(features, axis=0) labels = np.concatenate(labels, axis=0) # 使用 t-SNE 进行特征降维 tsne = TSNE(n_components=2, random_state=0) features_tsne = tsne.fit_transform(features) # 可视化降维后的特征向量 for i in range(10): plt.scatter(features_tsne[labels==i, 0], features_tsne[labels==i, 1], label=str(i)) plt.legend() plt.show() ``` 这里使用了 MNIST 数据集进行测试,可以替换成其他数据集。运行代码后将会显示出降维后的特征向量的散点图。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值