# Import necessary packages.import torch
import torch.nn as nn
import numpy as np
from torch.nn.utils import clip_grad_norm_
from data_utils import Dictionary, Corpus
# Hyper-parameters.
embed_size =128
hidden_size =1024
num_layers =1
num_epochs =5
num_samples =1000# number of words to be sampled.
batch_size =20
seq_length =30
learning_rate =0.002
# RNN based language model.classRNNLM(nn.Module):def__init__(self, vocab_size, embed_size, hidden_size, num_layers):super(RNNLM, self).__init__()
self.embed = nn.Embedding(vocab_size, embed_size)
self.lstm = nn.LSTM(embed_size, hidden_size, num_layers, batch_first=True)
self.linear = nn.Linear(hidden_size, vocab_size)defforward(self, x, h):# Embed word ids to vectors.
x = self.embed(x)# Forward propagate LSTM.
out,(h, c)= self.lstm(x, h)# Reshape output to (batch_size*sequence_length, hidden_size).
out = out.reshape(out.size(0)*out.size(1), out.size(2))# Decode hidden states of all time steps.
out = self.linear(out)return out,(h, c)
model = RNNLM(vocab_size, embed_size, hidden_size, num_layers).to(device)
# Loss and optimizer.
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# Truncated backpropagation.defdetach(states):return[state.detach()for state in states]
# Train the model.for epoch inrange(num_epochs):# Set initial hidden and cell states.
states =(torch.zeros(num_layers, batch_size, hidden_size).to(device),
torch.zeros(num_layers, batch_size, hidden_size).to(device))for i inrange(0, ids.size(1)- seq_length, seq_length):# Get mini-batch inputs and targets.
inputs = ids[:, i:i+seq_length].to(device)
targets = ids[:,(i+1):(i+1)+seq_length].to(device)# Forward pass.
states = detach(states)
outputs, states = model(inputs, states)
loss = criterion(outputs, targets.reshape(-1))# Backward and optimize.
optimizer.zero_grad()
loss.backward()
clip_grad_norm_(model.parameters(),0.5)
optimizer.step()
step =(i+1)// seq_length
if step %100==0:print('Epoch [{}/{}], Step[{}/{}], Loss: {:.4f}, Perplexity: {:5.2f}'.format(epoch+1, num_epochs, step, num_batches, loss.item(), np.exp(loss.item())))
# Test the model.with torch.no_grad():withopen('sample.txt','w')as f:# Set intial hidden ane cell states.
state =(torch.zeros(num_layers,1, hidden_size).to(device),
torch.zeros(num_layers,1, hidden_size).to(device))# Select one word id randomly.
prob = torch.ones(vocab_size)input= torch.multinomial(prob, num_samples=1).unsqueeze(1).to(device)for i inrange(num_samples):# Forward propagate RNN.
output, state = model(input, state)# Sample a word id.
prob = output.exp()
word_id = torch.multinomial(prob, num_samples=1).item()# Fill input with sampled word id for the next time step.input.fill_(word_id)# File write.
word = corpus.dictionary.idx2word[word_id]
word ='\n'if word =='<eos>'else word +' '
f.write(word)if(i+1)%100==0:print('Sampled [{}/{}] words and save to {}'.format(i+1, num_samples,'sample.txt'))# Save the model checkpoints.
torch.save(model.state_dict(),'model.ckpt')
Sampled [100/1000] words and save to sample.txt
Sampled [200/1000] words and save to sample.txt
Sampled [300/1000] words and save to sample.txt
Sampled [400/1000] words and save to sample.txt
Sampled [500/1000] words and save to sample.txt
Sampled [600/1000] words and save to sample.txt
Sampled [700/1000] words and save to sample.txt
Sampled [800/1000] words and save to sample.txt
Sampled [900/1000] words and save to sample.txt
Sampled [1000/1000] words and save to sample.txt