import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
train_dataset = datasets.MNIST(root='./',
train=True,
transform=transforms.ToTensor(),
download=True)
test_dataset = datasets.MNIST(root='./',
train=False,
transform=transforms.ToTensor(),
download=True)
batch_size = 64
train_loader = DataLoader(dataset=train_dataset,
batch_size=batch_size,
shuffle=True)
test_loader = DataLoader(dataset=test_dataset,
batch_size=batch_size,
shuffle=True)
for i, data in enumerate(train_loader):
inputs, labels = data
print(inputs.shape)
print(labels.shape)
break
'''
class LSTM(nn.Module):
def __init__(self):
super(LSTM, self).__init__()
self.lstm = torch.nn.LSTM(
input_size=28, 以图片为例,每一次传入一行的数据,就是28个特征值
hidden_size=64, 模块的数量,相当于隐藏层的神经元的个数
num_layers=1, 隐藏层的层数
batch_first=True 用来设置数据的格式,这样数据就不应处理了
)
定义全连接层,上面的相当于隐藏层
self.out = torch.nn.Linear(in_features=64,out_features=10)
self.softmax = torch.nn.Softmax(dim=1)
def forward(self, x):定义网络的计算
# (batch, seq_len, feature) 把数据的格式设置成这个样子
x = x.view(-1,28,28)
# output:[batch, seq_len, hidden_size]包含每个序列的输出结果
# 虽然LSTM的batch_first为True,但是h_n,c_n的第0个维度还是num_layers
# h_n:[num_layers, batch, hidden_size]只包含最后一个序列的输出结果
# c_n:[num_layers, batch, hidden_size]只包含最后一个序列的输出结果
这里面只需要看hn cn就可以了,不需要看output
output,(h_n,c_n) = self.lstm(x)
output_in_last_timestep = h_n[-1,:,:]我们只要他隐藏层最后一层的数据,“:表示”最后一层的全部数值
x = self.out(output_in_last_timestep) 这个数值传给上面的全连接层做计算
x = self.softmax(x) 数据在传给softmax
return x
'''
class LSTM(nn.Module):
def __init__(self):
super(LSTM, self).__init__()
self.lstm = torch.nn.LSTM(
input_size=28,
hidden_size=64,
num_layers=1,
batch_first=True
)
self.out = torch.nn.Linear(in_features=64,out_features=10)
self.softmax = torch.nn.Softmax(dim=1)
def forward(self, x):
x = x.view(-1,28,28)
output,(h_n,c_n) = self.lstm(x)
output_in_last_timestep = h_n[-1,:,:]
x = self.out(output_in_last_timestep)
x = self.softmax(x)
return x
LR = 0.0003
model = LSTM()
entropy_loss = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), LR)
def train():
model.train()
for i, data in enumerate(train_loader):
inputs, labels = data
out = model(inputs)
loss = entropy_loss(out, labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
def test():
model.eval()
correct = 0
for i, data in enumerate(test_loader):
inputs, labels = data
out = model(inputs)
_, predicted = torch.max(out, 1)
correct += (predicted == labels).sum()
print("Test acc: {0}".format(correct.item() / len(test_dataset)))
correct = 0
for i, data in enumerate(train_loader):
inputs, labels = data
out = model(inputs)
_, predicted = torch.max(out, 1)
correct += (predicted == labels).sum()
print("Train acc: {0}".format(correct.item() / len(train_dataset)))
for epoch in range(0, 10):
print('epoch:',epoch)
train()
test()