从头实现了单层LSTM,多层LSTM和多层双向LSTM。
其中多层LSTM:下一层LSTM会将上层LSTM的隐藏状态作为输入,
双向LSTM:将输入数据的时序反过来,最后再将得到的隐藏状态的时序反过来与正向得到的隐藏状态进行拼接。
实现代码如下:
import math
import paddle
import paddle.nn as nn
import torch.nn.functional as F
import numpy as np
#自定义LSTM实现
class CustomLSTM(nn.Layer):
def __init__(self, input_sz, hidden_sz,num_layers=2,bias=True,bidirectional=False):
super().__init__()
self.input_size = input_sz
self.hidden_size = hidden_sz
self.bias = bias
self.num_layers = num_layers
self.bidirectional = bidirectional
self.num_directions = 2 if bidirectional else 1
# self._all_weights = {}
self.param_names = []
for layer in range(self.num_layers):
self.param_names.append([])
for direction in range(self.num_directions):
self.input_size = self.input_size if layer == 0 else self.hidden_size * self.num_directions
# i_t
W = self.create_parameter([self.input_size, self.hidden_size * 4])
U = self.create_parameter([self.hidden_size, self.hidden_size * 4])
b = self.create_parameter([self.hidden_size * 4])
# print(self.W_c)
layer_params = (W, U, b)
suffix = '_reverse' if direction == 1 else ''
self.param_name = ['weight_W{}{}', 'weight_U{}{}']
if bias:
self.param_name += ['bias_{}{}']
self.param_name = [x.format(layer, suffix) for x in self.param_name]
for name, param in zip(self.param_name, layer_params):
setattr(self, name, param)
self.param_names[layer].append(self.param_name)
#self.init_weights()
self.all_weights = [[[getattr(self, weight) for weight in weights]
for weights in weights_layer] for weights_layer in self.param_names]
def forward(self, x,
init_states=None):
"""Assumes x is of shape (batch, sequence, feature)"""
batch_size, seq_sz, _ = x.shape
if init_states is None:
num_directions = 2 if self.bidirectional else 1
h_t, c_t = (paddle.zeros((self.num_layers * num_directions,batch_size,self.hidden_size)),
paddle.zeros((self.num_layers * num_directions,batch_size,self.hidden_size)))
else:
h_t, c_t = init_states
for layer in range(self.num_layers):
hidden_seq = []
hidden_seq_reverse = []
self.weight_layer = self.all_weights[layer]
for direction in range(self.num_directions):
# self.param_name = self.param_names[layer]
self.weight = self.weight_layer[direction]
HS = self.hidden_size
h_t, c_t = h_t[0].unsqueeze(0),c_t[0].unsqueeze(0)
for t in range(seq_sz):
x_t = x[:, t, :]
# batch the computations into a single matrix multiplication
# gates = x_t @ getattr(self,self.param_name[0]) + h_t @ getattr(self,self.param_name[1]) \
# + getattr(self,self.param_name[2])
if self.bias:
gates = x_t @ self.weight[0] + h_t @ self.weight[1] \
+ self.weight[2]
else:
gates = x_t @ self.weight[0] + h_t @ self.weight[1]
gates = gates[0]
i_t, f_t, g_t, o_t = (
paddle.nn.functional.sigmoid(gates[:, :HS]), # input
paddle.nn.functional.sigmoid(gates[:, HS:HS * 2]), # forget
paddle.tanh(gates[:, HS * 2:HS * 3]),
paddle.nn.functional.sigmoid(gates[:, HS * 3:]), # output
)
c_t = f_t * c_t + i_t * g_t
h_t = o_t * paddle.tanh(c_t)
if direction == 0:
if isinstance(hidden_seq, list):
hidden_seq = h_t[0].unsqueeze(1)
else:
hidden_seq = paddle.concat((hidden_seq, h_t[0].unsqueeze(1)), axis=1)
if direction == 1:
if isinstance(hidden_seq_reverse, list):
hidden_seq_reverse = h_t[0].unsqueeze(1)
else:
hidden_seq_reverse = paddle.concat((hidden_seq_reverse, h_t[0].unsqueeze(1)), axis=1)
x = paddle.to_tensor(x.detach().cpu().numpy()[:,::-1,:].copy())
if direction == 1:
hidden_seq_reverse = paddle.to_tensor(
hidden_seq_reverse.detach().cpu().numpy()[:, ::-1, :].copy())
hidden_seq = paddle.concat((hidden_seq, hidden_seq_reverse),axis=2)
x = hidden_seq
return hidden_seq, (h_t, c_t)
具体资源下载:从头实现LSTM,从开发到使用和比较完整代码