Bidirectional_recurrent_neural_network
import torch
import torch. nn as nn
import torchvision
import torchvision. transforms as transforms
device = torch. device( 'cuda' if torch. cuda. is_available( ) else 'cpu' )
sequence_length = 28
input_size = 28
hidden_size = 128
num_layers = 2
num_classes = 10
batch_size = 100
num_epochs = 5
learning_rate = 0.003
train_dataset = torchvision. datasets. MNIST( root= '../../data/' ,
train= True ,
transform = transforms. ToTensor( ) ,
download= True )
test_dataset = torchvision. datasets. MNIST( root= '../../data/' ,
train= False ,
transform= transforms. ToTensor( ) )
train_loader = torch. utils. data. DataLoader( dataset= train_dataset,
batch_size= batch_size,
shuffle= True )
test_loader = torch. utils. data. DataLoader( dataset= test_dataset,
batch_size= batch_size,
shuffle= False )
class BiRNN ( nn. Module) :
def __init__ ( self, input_size, hidden_size, num_layers, num_classes) :
super ( BiRNN, self) . __init__( )
self. hidden_size = hidden_size
self. num_layers = num_layers
self. lstm = nn. LSTM( input_size, hidden_size, num_layers, batch_first= True , bidirectional= True )
self. fc = nn. Linear( hidden_size* 2 , num_classes)
def forward ( self, x) :
h0 = torch. zeros( self. num_layers* 2 , x. size( 0 ) , self. hidden_size) . to( device)
c0 = torch. zeros( self. num_layers* 2 , x. size( 0 ) , self. hidden_size) . to( device)
out, _ = self. lstm( x, ( h0, c0) )
out = self. fc( out[ : , - 1 , : ] )
return out
model = BiRNN( input_size, hidden_size, num_layers, num_classes) . to( device)
criterion = nn. CrossEntropyLoss( )
optimizer = torch. optim. Adam( model. parameters( ) , lr= learning_rate)
total_step = len ( train_dataset)
for epoch in range ( num_epochs) :
for i, ( images, labels) in enumerate ( train_loader) :
images = images. reshape( - 1 , sequence_length, input_size) . to( device)
labels = labels. to( device)
outputs = model( images)
loss = criterion( outputs, labels)
optimizer. zero_grad( )
loss. backward( )
optimizer. step( )
if ( i+ 1 ) % 100 == 0 :
print ( 'Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
. format ( epoch+ 1 , num_epochs, i+ 1 , total_step, loss. item( ) ) )
Epoch [1/5], Step [100/60000], Loss: 0.7600
Epoch [1/5], Step [200/60000], Loss: 0.3808
Epoch [1/5], Step [300/60000], Loss: 0.1809
Epoch [1/5], Step [400/60000], Loss: 0.2003
Epoch [1/5], Step [500/60000], Loss: 0.1400
Epoch [1/5], Step [600/60000], Loss: 0.0573
Epoch [2/5], Step [100/60000], Loss: 0.1896
Epoch [2/5], Step [200/60000], Loss: 0.0918
Epoch [2/5], Step [300/60000], Loss: 0.0184
Epoch [2/5], Step [400/60000], Loss: 0.0292
Epoch [2/5], Step [500/60000], Loss: 0.1486
Epoch [2/5], Step [600/60000], Loss: 0.0494
Epoch [3/5], Step [100/60000], Loss: 0.0355
Epoch [3/5], Step [200/60000], Loss: 0.0233
Epoch [3/5], Step [300/60000], Loss: 0.0608
Epoch [3/5], Step [400/60000], Loss: 0.0590
Epoch [3/5], Step [500/60000], Loss: 0.0765
Epoch [3/5], Step [600/60000], Loss: 0.0127
Epoch [4/5], Step [100/60000], Loss: 0.0712
Epoch [4/5], Step [200/60000], Loss: 0.0620
Epoch [4/5], Step [300/60000], Loss: 0.0265
Epoch [4/5], Step [400/60000], Loss: 0.0269
Epoch [4/5], Step [500/60000], Loss: 0.0236
Epoch [4/5], Step [600/60000], Loss: 0.0022
Epoch [5/5], Step [100/60000], Loss: 0.0162
Epoch [5/5], Step [200/60000], Loss: 0.0763
Epoch [5/5], Step [300/60000], Loss: 0.0087
Epoch [5/5], Step [400/60000], Loss: 0.0111
Epoch [5/5], Step [500/60000], Loss: 0.0914
Epoch [5/5], Step [600/60000], Loss: 0.0205
model. eval ( )
with torch. no_grad( ) :
correct = 0
total = 0
for images, labels in test_loader:
images = images. reshape( - 1 , sequence_length, input_size) . to( device)
labels = labels. to( device)
outputs = model( images)
_, predicted = torch. max ( outputs. data, 1 )
total += labels. size( 0 )
correct += ( predicted == labels) . sum ( ) . item( )
print ( "Test Accuracy of the model on the 10000 test images: {}" . format ( 100 * correct / total) )
Test Accuracy of the model on the 10000 test images: 98.59
torch. save( model. state_dict( ) , 'model_param.ckpt' )
torch. save( model, 'model.ckpt' )
model. load_state_dict( torch. load( 'model_param.ckpt' ) )
torch. load( 'model.ckpt' )
BiRNN(
(lstm): LSTM(28, 128, num_layers=2, batch_first=True, bidirectional=True)
(fc): Linear(in_features=256, out_features=10, bias=True)
)