动手从头实现LSTM

从头实现了单层LSTM,多层LSTM和多层双向LSTM。
其中多层LSTM:下一层LSTM会将上层LSTM的隐藏状态作为输入,
双向LSTM:将输入数据的时序反过来,最后再将得到的隐藏状态的时序反过来与正向得到的隐藏状态进行拼接。
实现代码如下:

import math
import paddle
import paddle.nn as nn
import torch.nn.functional as F
import numpy as np

#自定义LSTM实现
class CustomLSTM(nn.Layer):
    def __init__(self, input_sz, hidden_sz,num_layers=2,bias=True,bidirectional=False):
        super().__init__()
        self.input_size = input_sz
        self.hidden_size = hidden_sz
        self.bias = bias
        self.num_layers = num_layers
        self.bidirectional = bidirectional
        self.num_directions = 2 if bidirectional else 1
        # self._all_weights = {}
        self.param_names = []
        for layer in range(self.num_layers):
            self.param_names.append([])
            for direction in range(self.num_directions):
                self.input_size = self.input_size if layer == 0 else self.hidden_size * self.num_directions
                # i_t
                W = self.create_parameter([self.input_size, self.hidden_size * 4])
                U = self.create_parameter([self.hidden_size, self.hidden_size * 4])
                b = self.create_parameter([self.hidden_size * 4])

                # print(self.W_c)
                layer_params = (W, U, b)

                suffix = '_reverse' if direction == 1 else ''
                self.param_name = ['weight_W{}{}', 'weight_U{}{}']
                if bias:
                    self.param_name += ['bias_{}{}']
                self.param_name = [x.format(layer, suffix) for x in self.param_name]
                for name, param in zip(self.param_name, layer_params):
                    setattr(self, name, param)
                self.param_names[layer].append(self.param_name)


        #self.init_weights()
        self.all_weights = [[[getattr(self, weight) for weight in weights]
                        for weights in weights_layer] for weights_layer in self.param_names]


    def forward(self, x,
                init_states=None):
        """Assumes x is of shape (batch, sequence, feature)"""
        batch_size, seq_sz, _ = x.shape
        if init_states is None:
            num_directions = 2 if self.bidirectional else 1
            h_t, c_t = (paddle.zeros((self.num_layers * num_directions,batch_size,self.hidden_size)),
                        paddle.zeros((self.num_layers * num_directions,batch_size,self.hidden_size)))
        else:
            h_t, c_t = init_states

        for layer in range(self.num_layers):
            hidden_seq = []
            hidden_seq_reverse = []
            self.weight_layer = self.all_weights[layer]
            for direction in range(self.num_directions):
                # self.param_name = self.param_names[layer]
                self.weight = self.weight_layer[direction]
                HS = self.hidden_size
                h_t, c_t = h_t[0].unsqueeze(0),c_t[0].unsqueeze(0)
                for t in range(seq_sz):
                    x_t = x[:, t, :]
                    # batch the computations into a single matrix multiplication

                    # gates = x_t @ getattr(self,self.param_name[0]) + h_t @ getattr(self,self.param_name[1]) \
                    #    + getattr(self,self.param_name[2])
                    if self.bias:
                        gates = x_t @ self.weight[0] + h_t @ self.weight[1] \
                            + self.weight[2]
                    else:
                        gates = x_t @ self.weight[0] + h_t @ self.weight[1]

                    gates = gates[0]
                    i_t, f_t, g_t, o_t = (
                        paddle.nn.functional.sigmoid(gates[:, :HS]),  # input
                        paddle.nn.functional.sigmoid(gates[:, HS:HS * 2]),  # forget
                        paddle.tanh(gates[:, HS * 2:HS * 3]),
                        paddle.nn.functional.sigmoid(gates[:, HS * 3:]),  # output
                    )
                    c_t = f_t * c_t + i_t * g_t
                    h_t = o_t * paddle.tanh(c_t)

                    if direction == 0:
                        if isinstance(hidden_seq, list):
                            hidden_seq = h_t[0].unsqueeze(1)
                        else:
                            hidden_seq = paddle.concat((hidden_seq, h_t[0].unsqueeze(1)), axis=1)

                    if direction == 1:
                        if isinstance(hidden_seq_reverse, list):
                            hidden_seq_reverse = h_t[0].unsqueeze(1)
                        else:
                            hidden_seq_reverse = paddle.concat((hidden_seq_reverse, h_t[0].unsqueeze(1)), axis=1)
                x = paddle.to_tensor(x.detach().cpu().numpy()[:,::-1,:].copy())
                if direction == 1:
                    hidden_seq_reverse = paddle.to_tensor(
                        hidden_seq_reverse.detach().cpu().numpy()[:, ::-1, :].copy())
                    hidden_seq = paddle.concat((hidden_seq, hidden_seq_reverse),axis=2)
            x = hidden_seq
        return hidden_seq, (h_t, c_t)

具体资源下载:从头实现LSTM,从开发到使用和比较完整代码

  • 0
    点赞
  • 9
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

智能学习者

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值