LSTM
model.py
import torch.nn as nn
import torch
from torch.autograd import Variable
class RNN(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(RNN, self).__init__()
self.hidden_size = hidden_size
self.u = nn.Linear(input_size, hidden_size)
self.w = nn.Linear(hidden_size, hidden_size)
self.v = nn.Linear(hidden_size, output_size)
self.tanh = nn.Tanh()
self.softmax = nn.LogSoftmax(dim=1)
def forward(self, inputs, hidden):
u_x = self.u(inputs)
hidden = self.w(hidden)
hidden = self.tanh(hidden + u_x)
output = self.softmax(self.v(hidden))
return output, hidden
def initHidden(self):
return torch.zeros(1, self.hidden_size)
class LSTM(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(LSTM,self).__init__()
self.input_dim = input_size
self.hidden_dim = hidden_size
self.lstm = nn.LSTM(input_size,hidden_size)
# self.softmax = nn.LogSoftmax(dim=1)
self.hidden2out = nn.Linear(hidden_size,output_size)
self.softmax = nn.LogSoftmax(dim=1) # LogSoftmax有什么好处
self.hidden = self.init_hidden()
def init_hidden(self):
return (
Variable(torch.zeros(1,1,self.hidden_dim)),
Variable(torch.zeros(1,1,self.hidden_dim))
)
def forward(self,inputs):
tt=inputs.view(len(inputs), 1, -1)
lstm_out,self.hidden = self.lstm(inputs.view(len(inputs),1,-1),self.hidden)
# output, hidden = rnn(arrs_tensor[i], hidden)
outdat = self.hidden2out(lstm_out.view(len(inputs),-1))
output = self.softmax(outdat) #
return output
main.py
from model import RNN
from model import LSTM
from data_loader import randomTrainingExample
import torch.nn as nn
import torch
import torch.optim as optim
# ============================ step 0/5 参数设置 ============================
device = torch.device("cpu")
learning_rate = 0.01
n_iters = 200000
n_hidden = 128
n_input = 15
n_categories = 2
# ============================ step 1/5 数据 ================================
# from data_loader import randomTrainingExample
# ============================ step 2/5 模型 ================================
# rnn = RNN(n_input, n_hidden, n_categories)
rnn = RNN(n_input, n_hidden, n_categories)
rnn.to(device)
lstm = LSTM(n_input, n_hidden, n_categories)
lstm.to(device)
# ============================ step 3/5 损失函数 ============================
criterion = nn.NLLLoss()
# ============================ step 4/5 优化器 ==============================
optimizer = optim.SGD(lstm.parameters(),lr=learning_rate)
# for p in rnn.parameters(): # 优化器
# p.data.add_(-learning_rate, p.grad.data)
# ============================ step 5/5 训练 ================================
# def train(category_tensor, arrs_tensor):
# hidden = rnn.initHidden()
#
# rnn.zero_grad()
#
# arrs_tensor = arrs_tensor.to(device)
# hidden = hidden.to(device)
# category_tensor = category_tensor.to(device)
#
# for i in range(arrs_tensor.size()[0]):
# output, hidden = rnn(arrs_tensor[i], hidden)
#
# loss = criterion(output, category_tensor)
# optimizer.zero_grad()
# loss.backward()
#
# # Add parameters' gradients to their values, multiplied by learning rate
# # for p in rnn.parameters(): # 优化器
# # # p.data.add_(-learning_rate, p.grad.data)
# optimizer.step()
#
#
# return output, loss.item()
def train_lstm(category_tensor, arrs_tensor):
#hidden = rnn.initHidden()
lstm.zero_grad()
arrs_tensor = arrs_tensor.to(device)
category_tensor = category_tensor.to(device)
lstm.hidden = lstm.init_hidden()
# hidden = hidden.to(device)
for i in range(arrs_tensor.size()[0]):
# t=arrs_tensor[i][0]
# output, hidden = rnn(arrs_tensor[i], hidden)
output = lstm(arrs_tensor[i])
loss = criterion(output, category_tensor)
optimizer.zero_grad()
loss.backward()
# Add parameters' gradients to their values, multiplied by learning rate
# for p in rnn.parameters(): # 优化器
# # p.data.add_(-learning_rate, p.grad.data)
optimizer.step()
return output, loss.item()
for iter in range(1, n_iters + 1):
category_tensor, arrs_tensor = randomTrainingExample()
# training
output, loss = train_lstm(category_tensor, arrs_tensor)
print(loss)
# =========================== inference ====================================