import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import hiddenlayer as hl
from sklearn.manifold import TSNE
from sklearn.svm import SVC
from sklearn.decomposition import PCA
from sklearn.metrics import classification_report, accuracy_score
import torch
from torch import nn
import torch.nn.functional as F
import torch.utils.data as Data
import torch.optim as optim
from torchvision import transforms
from torchvision.datasets import MNIST
from torchvision.utils import make_grid
train_data = MNIST(
root="./data", # 数据的路径
train=True, # 只使用训练数据集
transform=transforms.ToTensor,
download=False
)
# 将图像数据转化为向量数据
train_data_x = train_data.data.type(torch.FloatTensor) / 255.0
train_data_x = train_data_x.reshape(train_data_x.shape[0], -1)
train_data_y = train_data.targets
# 定义一个数据加载器
train_loader = Data.DataLoader(
dataset=train_data_x,
batch_size=64,
shuffle=True,
num_workers=0,
)
# d对测试集数据进行导入
test_data = MNIST(
root="./data",
train=False, # 只使用训练数据集
transform=transforms.ToTensor,
download=False
)
# 为测试数据添加一个通道纬度,获取测试数据的X和Y
test_data_x = test_data.data.type(torch.FloatTensor) / 255.0
test_data_x = test_data_x.reshape(test_data_x.shape[0], -1)
test_data_y = test_data.targets
print("训练数据集", train_data_x.shape)
print("测试数据集", test_data_x.shape)
# # 可视化一个batch的数据
# for step, b_x in enumerate(train_loader):
# if step > 0:
# break
# # 可视化一个batch的数据
# im = make_grid(b_x.reshape((-1, 1, 28, 28)))
# im = im.data.numpy().transpose((1, 2, 0))
# plt.figure()
# plt.imshow(im)
# plt.axis("off")
# plt.show()
# 搭建自编码网络
class EnDecodder(nn.Module):
def __init__(self):
super(EnDecodder, self).__init__()
# 定义encoder
self.Encoder = nn.Sequential(
nn.Linear(784,512),
nn.Tanh(),
nn.Linear(512,256),
nn.Tanh(),
nn.Linear(256,128),
nn.Tanh(),
nn.Linear(128,3),
nn.Tanh(),
)
# 定义decoder
self.Decoder = nn.Sequential(
nn.Linear(3,128),
nn.Tanh(),
nn.Linear(128,256),
nn.Tanh(),
nn.Linear(256,512),
nn.Tanh(),
nn.Linear(512,784),
nn.Sigmoid(),
)
# 定义网络的前向传播途径
def forward(self,x):
encoder = self.Encoder(x)
decoder = self.Decoder(encoder)
return encoder,decoder
# 输出网络结构
edmodel = EnDecodder()
print(edmodel)
optimizer = torch.optim.Adam(edmodel.parameters(), lr=0.003)
loss_func = nn.MSELoss()
# 记录训练过程中的指标
history1 = hl.History()
# 使用canvas进行数据可视化
canvas1 = hl.Canvas()
train_num = 0
val_num = 0
# 对模型进行迭代训练
for epoch in range(10):
train_loss_epoch = 0
# 对训练数据的加载器进行迭代计算
for step,b_x in enumerate(train_loader):
# 使用每个batch训练模型
_,output = edmodel(b_x)
loss = loss_func(output, b_x)
optimizer.zero_grad()
loss.backward()
optimizer.step()
train_loss_epoch += loss.item()*b_x.size(0)
train_num = train_num+b_x.size(0)
# 计算一个epoch的损失
train_loss = train_loss_epoch/train_num
# 保存每个epoch上的输出loss
history1.log(epoch, train_loss=train_loss)
# 可视化训练的过程
with canvas1:
canvas1.draw_plot(history1["train_loss"])
edmodel.eval()
_,test_encoder = edmodel(test_data_x[0:100, :])
# 可视化原始的图像
plt.figure(figsize=(6,6))
for ii in range(test_encoder.shape[0]):
plt.subplot(10,10,ii+1)
im = test_data_x[ii, :]
im = im.data.numpy().reshape(28,28)
plt.imshow(im, cmap=plt.cm.gray)
plt.axis("off")
plt.show()
#可视化编码后的图像
plt.figure(figsize=(6, 6))
for ii in range(test_encoder.shape[0]):
plt.subplot(10,10,ii+1)
im =test_encoder[ii,:]
im = im.data.numpy().reshape(28,28)
plt.imshow(im,cmap=plt.cm.gray)
plt.axis("off")
plt.show()
edmodel.eval()
TEST_num = 500
test_encoder,_=edmodel(test_data_x[0:TEST_num,:])
print("test_encoder.shape", test_encoder.shape)
# %config InlineBackend.print_figure_kwargs = {'bbox_inches':None}
test_encoder_arr = test_encoder.data.numpy()
fig = plt.figure(figsize=(12,8))
ax1 = Axes3D(fig)
X = test_encoder_arr[:,0]
Y = test_encoder_arr[:,1]
Z = test_encoder_arr[:,2]
ax1.set_xlim([min(X), max(X)])
ax1.set_ylim([min(Y), max(Y)])
ax1.set_zlim([min(Z), max(Z)])
for ii in range(test_encoder.shape[0]):
text = test_data_y.data.numpy()[ii]
ax1.text(X[ii],Y[ii],Z[ii],str(text),fontsize=8, bbox=dict(boxstyle="round", facecolor = plt.cm.Set1(text), alpha=0.7))
plt.show()