使用pytorch的接口实现LSTM

LSTM涉及的公式

i t = σ ( W i i x t + b i i + W h i h ( t − 1 ) + b h i ) i_t = \sigma(W_{ii} x_t + b_{ii} + W_{hi} h_{(t-1)} + b_{hi}) it=σ(Wiixt+bii+Whih(t1)+bhi)
f t = σ ( W i f x t + b i f + W h f h ( t − 1 ) + b h f ) f_t = \sigma(W_{if} x_t + b_{if} + W_{hf} h_{(t-1)} + b_{hf}) ft=σ(Wifxt+bif+Whfh(t1)+bhf)
g t = tanh ⁡ ( W i g x t + b i g + W h g h ( t − 1 ) + b h g ) g_t = \tanh(W_{ig} x_t + b_{ig} + W_{hg} h_{(t-1)} + b_{hg}) gt=tanh(Wigxt+big+Whgh(t1)+bhg)
o t = σ ( W i o x t + b i o + W h o h ( t − 1 ) + b h o ) o_t = \sigma(W_{io} x_t + b_{io} + W_{ho} h_{(t-1)} + b_{ho}) ot=σ(Wioxt+bio+Whoh(t1)+bho)
c t = f t ∗ c ( t − 1 ) + i t ∗ g t c_t = f_t * c_{(t-1)} + i_t * g_t ct=ftc(t1)+itgt
h t = o t ∗ tanh ⁡ ( c t ) h_t = o_t * \tanh(c_t) ht=ottanh(ct)

loss_func = nn.CrossEntropyLoss() 内部计算方式

loss ( x , c l a s s ) = − log ⁡ ( exp ⁡ ( x [ c l a s s ] ) ∑ j exp ⁡ ( x [ j ] ) ) = − x [ c l a s s ] + log ⁡ ( ∑ j exp ⁡ ( x [ j ] ) ) \text{loss}(x, class) = -\log\left(\frac{\exp(x[class])}{\sum_j \exp(x[j])}\right) = -x[class] + \log\left(\sum_j \exp(x[j])\right) loss(x,class)=log(jexp(x[j])exp(x[class]))=x[class]+log(jexp(x[j]))
loss ( x , c l a s s ) = w e i g h t [ c l a s s ] ( − x [ c l a s s ] + log ⁡ ( ∑ j exp ⁡ ( x [ j ] ) ) ) \text{loss}(x, class) = weight[class] \left(-x[class] + \log\left(\sum_j \exp(x[j])\right)\right) loss(x,class)=weight[class](x[class]+log(jexp(x[j])))

图表示

在这里插入图片描述

在这里插入图片描述

在这里插入图片描述

涉及的数学知识

Hadamard product

在这里插入图片描述

权重初始化的方式

U ( − k , k ) \mathcal{U}(-\sqrt{k}, \sqrt{k}) U(k ,k )
k = 1 hidden_size k = \frac{1}{\text{hidden\_size}} k=hidden_size1

tensor维度不匹配加法、乘法

import numpy as np
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

rnn = nn.LSTM(10, 20, 2)
input1 = torch.ones(5, 3)
print(input1)
input2 = torch.randn(3)
print(input2)
print('+++++++++++++++++++++++++++++')
print(input1+input2)

output

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])
tensor([-0.8281,  1.7018, -0.9728])
+++++++++++++++++++++++++++++
tensor([[0.1719, 2.7018, 0.0272],
        [0.1719, 2.7018, 0.0272],
        [0.1719, 2.7018, 0.0272],
        [0.1719, 2.7018, 0.0272],
        [0.1719, 2.7018, 0.0272]])

乘法

rnn = nn.LSTM(10, 20, 2)
input1 = torch.ones(5, 3)
print(input1)
input2 = torch.randn(3, 1)
print(input2)
print('+++++++++++++++++++++++++++++')
print(input1.matmul(input2))
tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])
tensor([[-0.5661],
        [-1.7032],
        [-2.1534]])
+++++++++++++++++++++++++++++
tensor([[-4.4227],
        [-4.4227],
        [-4.4227],
        [-4.4227],
        [-4.4227]])

当变化input2 = torch.randn(3, 1)input2 = torch.randn(3)时输出位

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])
tensor([0.0792, 0.3583, 0.3870])
+++++++++++++++++++++++++++++
tensor([0.8244, 0.8244, 0.8244, 0.8244, 0.8244])

构建一个简单的LSTM并监测输出的维度

rnn = nn.LSTM(10, 20, 2)
input = torch.ones(5, 3, 10)
h0 = torch.randn(2, 3, 20)
c0 = torch.randn(2, 3, 20)
output, (hn, cn) = rnn(input, (h0, c0))
print(output.shape)
print(hn.shape)
print(cn.shape)
torch.Size([5, 3, 20])
torch.Size([2, 3, 20])
torch.Size([2, 3, 20])

官方例子解析

链接:官方lstm-example
代码
generate_sine_wave.py

import numpy as np
import torch
np.random.seed(2)
T = 20
L = 1000
N = 100
x = np.empty((N, L), 'int64') # 生成一个100*1000维度的随机array
x[:] = np.array(range(L)) + np.random.randint(-4 * T, 4 * T, N).reshape(N, 1) # array(1000) + 在一定范围内[-80,80) 的array(1000,1)
data = np.sin(x / 1.0 / T).astype('float64') # X是一个(100,1000)的array 每一行的每个元素每一列的(index+[-80,80))/20 再sin
print(x)
print(data)
torch.save(data, open('traindata.pt', 'wb'))

'''
>>>x每一行的元素tensor([-12,-11,-10,....,987]) len=1000,一共rows=100
>>>然后再分别除以T=20,再sin
>>> aa=np.random.randint(-4 * 20, 4 * 20, 10).reshape(10,1)
>>> bb= np.array(range(10))
>>> aa
array([[-73],
       [  7],
       [-17],
       [ 42],
       [ 66],
       [  3],
       [-60],
       [ 34],
       [ 44],
       [-24]])
>>> bb
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
>>> aa+bb
array([[-73, -72, -71, -70, -69, -68, -67, -66, -65, -64],
       [  7,   8,   9,  10,  11,  12,  13,  14,  15,  16],
       [-17, -16, -15, -14, -13, -12, -11, -10,  -9,  -8],
       [ 42,  43,  44,  45,  46,  47,  48,  49,  50,  51],
       [ 66,  67,  68,  69,  70,  71,  72,  73,  74,  75],
       [  3,   4,   5,   6,   7,   8,   9,  10,  11,  12],
       [-60, -59, -58, -57, -56, -55, -54, -53, -52, -51],
       [ 34,  35,  36,  37,  38,  39,  40,  41,  42,  43],
       [ 44,  45,  46,  47,  48,  49,  50,  51,  52,  53],
       [-24, -23, -22, -21, -20, -19, -18, -17, -16, -15]])
       维度为2的每个列元素 加维度为1的每个元素作为第一行
'''

train.py

from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

class Sequence(nn.Module):
    def __init__(self):
        super(Sequence, self).__init__()
        self.lstm1 = nn.LSTMCell(1, 51) # input_size=1 hidden_size=51
        self.lstm2 = nn.LSTMCell(51, 51)
        self.linear = nn.Linear(51, 1)

    def forward(self, input, future = 0):
        outputs = []
        h_t = torch.zeros(input.size(0), 51, dtype=torch.double) # input.size(0)=batch_size hidden_size=51
        c_t = torch.zeros(input.size(0), 51, dtype=torch.double)
        h_t2 = torch.zeros(input.size(0), 51, dtype=torch.double)
        c_t2 = torch.zeros(input.size(0), 51, dtype=torch.double)
                                                                        # input_t tensor (97,1) 是每一列
        for i, input_t in enumerate(input.chunk(input.size(1), dim=1)): # chunk()方法是将数组中的元素进行分块,每一块为一个数组,最终返回由每个块组成的元组。 chunk之后的是一个tuple类型
            # h_t(97,51) c_t(97,51)
            h_t, c_t = self.lstm1(input_t, (h_t, c_t))                  # torch.cat()可以看做 torch.split() 和 torch.chunk()的反操作  参考:https://blog.csdn.net/benbenls/article/details/102974070?utm_medium=distribute.pc_relevant.none-task-blog-BlogCommendFromMachineLearnPai2-6.channel_param&depth_1-utm_source=distribute.pc_relevant.none-task-blog-BlogCommendFromMachineLearnPai2-6.channel_param
            # h_t2(97,51) c_t2(97,51)
            h_t2, c_t2 = self.lstm2(h_t, (h_t2, c_t2))
            output = self.linear(h_t2)
            outputs += [output] #  outputs的每一个元素是(97,1)的tensor  len(outputs) = 999
        for i in range(future):# if we should predict the future
            h_t, c_t = self.lstm1(output, (h_t, c_t))
            h_t2, c_t2 = self.lstm2(h_t, (h_t2, c_t2))
            output = self.linear(h_t2)
            outputs += [output] #  [] + [1,2,3] = [1,2,3] outputs=[每个元素是tensor(97,1),....]
        outputs = torch.stack(outputs, 1).squeeze(2) # (97,999,1) squeeze 之后1就没了
        return outputs


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--steps', type=int, default=2, help='steps to run')
    opt = parser.parse_args()
    # set random seed to 0
    np.random.seed(0)
    torch.manual_seed(0)
    # load data and make training set
    data = torch.load('traindata.pt') # (100,1000)
    input = torch.from_numpy(data[3:, :-1])
    target = torch.from_numpy(data[3:, 1:])
    test_input = torch.from_numpy(data[:3, :-1])
    test_target = torch.from_numpy(data[:3, 1:])
    # build the model
    seq = Sequence()
    seq.double()
    criterion = nn.MSELoss()
    # use LBFGS as optimizer since we can load the whole data to train
    optimizer = optim.LBFGS(seq.parameters(), lr=0.8)
    #begin to train
    for i in range(opt.steps):
        print('STEP: ', i)
        def closure(): # A closure that reevaluates the model and returns the loss.
            optimizer.zero_grad()
            out = seq(input)
            loss = criterion(out, target)
            print('loss:', loss.item())
            loss.backward()
            return loss
        optimizer.step(closure)
        # begin to predict, no need to track gradient here
        with torch.no_grad():
            future = 1000
            pred = seq(test_input, future=future)
            loss = criterion(pred[:, :-future], test_target)
            print('test loss:', loss.item())
            y = pred.detach().numpy()
        # draw the result
        plt.figure(figsize=(30,10))
        plt.title('Predict future values for time sequences\n(Dashlines are predicted values)', fontsize=30)
        plt.xlabel('x', fontsize=20)
        plt.ylabel('y', fontsize=20)
        plt.xticks(fontsize=20)
        plt.yticks(fontsize=20)
        def draw(yi, color):
            plt.plot(np.arange(input.size(1)), yi[:input.size(1)], color, linewidth = 2.0)
            plt.plot(np.arange(input.size(1), input.size(1) + future), yi[input.size(1):], color + ':', linewidth = 2.0)
        draw(y[0], 'r')
        draw(y[1], 'g')
        draw(y[2], 'b')
        plt.savefig('predict%d.pdf'%i)
        plt.close()

手写图片

解释变量
在这里插入图片描述

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值