LSTM和GRU的简单应用
任务目标:Seq2Seq
Train a model to learn: “hello” -> “ohlol”
LSTM
结构:
LSTM的公式:
import torch
import torch.nn as nn
# parameters
num_class = 4
input_size = 4
batch_size = 1
hidden_size = 8
num_layers = 2
embedding_size = 10
seq_len = 5
# data
idx2char = ['e', 'h', 'l', 'o']
x_data = [1, 0, 2, 2, 3]
y_data = [3, 1, 2, 3, 2]
# input should be (batchSize, seqLen),Target should be (batchSize*seqLen)
inputs = torch.LongTensor(x_data).view(batch_size, seq_len) # batch_size在前面
labels = torch.LongTensor(y_data)
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.emd = nn.Embedding(input_size, embedding_size)
self.lstm = nn.LSTM(num_layers=num_layers, input_size=embedding_size, hidden_size=hidden_size, batch_first=True) # batch size first
self.fc = nn.Linear(hidden_size, num_class)
def forward(self, x):
hidden = torch.zeros(num_layers, batch_size, hidden_size) # h0
x = self.emd(x) # (batch, seqLen, embeddingSize)
x, hidden = self.lstm(x)
x = self.fc(x)
return x.view(-1, num_class) # 方便nn.CrossEntropyLoss()计算
model = Net()
# loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.05)
for epoch in range(15):
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
_, predicted = outputs.max(dim=1) # 分类最大值的下标
predicted = predicted.data.numpy()
print('Predicted string is', ''.join([idx2char[idx] for idx in predicted]), end='')
print(', Epoch [%d/15] loss=%.4f' % (epoch+1, loss.item()))
GRU
结构和公式:
import torch
import torch.nn as nn
# parameters
num_class = 4
input_size = 4
batch_size = 1
hidden_size = 8
embedding_size = 10
num_layers = 2
seq_len = 5
# data
idx2char = ['e', 'h', 'l', 'o']
x_data = [1, 0, 2, 2, 3]
y_data = [3, 1, 2, 3, 2]
# input should be (batchSize, seqLen),Target should be (batchSize*seqLen)
inputs = torch.LongTensor(x_data).view(seq_len, batch_size) # seqLen在前面
labels = torch.LongTensor(y_data)
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.emd = nn.Embedding(input_size, embedding_size)
self.gru = nn.GRU(num_layers=num_layers, input_size=embedding_size, hidden_size=hidden_size) # batch size False
self.fc = nn.Linear(hidden_size, num_class)
def forward(self, x):
hidden = torch.zeros(num_layers, batch_size, hidden_size) # h0
x = self.emd(x) # (batch, seqLen, embeddingSize)
x, hidden = self.gru(x, hidden)
x = self.fc(x)
return x.view(-1, num_class) # # 方便nn.CrossEntropyLoss()计算
model = Net()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.05)
for epoch in range(15):
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
_, predicted = outputs.max(dim=1)
print('Predicted string is', ''.join([idx2char[idx] for idx in predicted]), end='')
print(', Epoch [%d/15] loss=%.4f' % (epoch+1, loss.item()))