import numpy as np
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt_sne
from sklearn import datasets
from sklearn.manifold import TSNE
import os
plt_sne.rcParams['font.sans-serif'] = ['SimHei']
plt_sne.rcParams['axes.unicode_minus'] = False
def plot_tsne(features, labels, epoch, fileNameDir=None):
'''
features:(N*m) N*m大小特征,其中N代表有N个数据,每个数据m维
label:(N) 有N个标签
'''
print(features.shape, labels.shape)
print(type(features), type(labels))
print(np.any(np.isnan(features)), np.any(np.isinf(features)))
features = np.nan_to_num(features)
if not os.path.exists(fileNameDir):
os.makedirs(fileNameDir)
import pandas as pd
tsne = TSNE(n_components=2, init='pca', random_state=0)
import seaborn as sns
# 查看标签的种类有几个
class_num = len(np.unique(labels)) # 要分类的种类个数 eg:[0, 1, 2, 3]这个就是为4
try:
# x_min, x_max = features.min(0), features.max(0)
# features = (features - x_min) / (x_max - x_min) # 归一化,消除不同维度的量纲差别
tsne_features = tsne.fit_transform(features) # 将特征使用PCA降维至2维
except:
tsne_features = tsne.fit_transform(features)
# 一个类似于表格的数据结构
df = pd.DataFrame()
df["y"] = labels
df["comp1"] = tsne_features[:, 0]
df["comp2"] = tsne_features[:, 1]
# hue:根据y列上的数据种类,来生成不同的颜色;
# style:根据y列上的数据种类,来生成不同的形状点;
plt_sne.figure(figsize=(15,15)) #修改画布大小
sns.scatterplot(x=df.comp1.tolist(), y=df.comp2.tolist(), hue=df.y.tolist(), style=df.y.tolist(),
palette=sns.color_palette("Set2", class_num),
data=df).set(title="T-SNE 语义空间分布图")
# sns.scatterplot(x=df.comp1.tolist(), y=df.comp2.tolist(), hue=df.y.tolist(),
# palette=sns.color_palette("Set2", class_num),
# data=df).set(title="T-SNE 语义空间分布图")
plt_sne.savefig(os.path.join(fileNameDir, "%s.jpg") % str(epoch), format="jpg")
plt_sne.show()
if __name__ == '__main__':
digits = datasets.load_digits(n_class=2)
features, labels = digits.data, digits.target #np.array()
print(features.shape)
print(labels.shape)
plot_tsne(features, labels, "Set3", fileNameDir="test")
引用来源:https://blog.csdn.net/power_kaikaige/article/details/128324084