rnn理解、复现和应用

rnn理解、复现和应用

rnn理解

pytorch中rnn参数

nn.RNN(
    input_size=4,
    hidden_size=3,
    # rnn数量
    # ps: 设置num_layers=2将意味着将两个RNN堆叠在一起以形成堆叠的RNN,第二RNN接收第一RNN的输出并计算最终结果
    num_layers=1, # 默认
    nonlinearity='tanh', # 默认
    bias=True, # 默认
    batch_first=False, # 默认
    dropout=0, # 默认
    bidirectional=False # 默认
)

rnn输入、隐藏层、输出

'''
输入层5个节点,
隐藏层6个节点,
一个rnn,
批大小在前面
'''
rnn = nn.RNN(input_size=5, hidden_size=6, num_layers=1, batch_first=True)

'''
以一层RNN为例子
初始化时这里会有四个参数,分别为
	weight_ih_l0 => (hidden_size, input_size)
	weight_hh_l0 => (hidden_size, hidden_size)
	bias_ih_l0 => (hidden_size)
	bias_hh_l0 => (hidden_size)

注: 这里以及后面复现的时候为什么维度会倒过来
例如,weight_ih_l0这里应该是input_size在前,而结果却是hidden_size在前
我也不是很清楚,看了pytorch官网之前版本的源码,这里好像就是这么定义的
结合后面复现RNN,个人理解应该是为了矩阵相乘时不用再转置了
'''
'''
输入:
	batch_first = True时
		(batch_size, seq_len, input_size)
	batch_first = False时
		(seq_len, batch_size, input_size)
'''
input = torch.randn(3, 1, 5)
'''
隐藏层:
	h0如果没有提供,全为0
	(num_layers * num_directions, batch_size, hidden_size)
	num_directions: 是单向还是双向
	    单向num_directions = 1
	    双向单向num_directions = 2
'''
h0 = torch.randn(1, 3, 6)
'''

输出层:(batch_size, seq_len, num_directions * hidden_size)
隐藏层hn:(num_layers * num_directions, batch_size, hidden_size)
'''
output, hn = rnn(input)
# 显示所有状态
print(rnn.state_dict())
OrderedDict([('weight_ih_l0', tensor(
	   [[-0.0042, -0.2252, -0.1453, -0.1967,  0.0716],
        [ 0.1114,  0.3056, -0.2761, -0.0785,  0.0996],
        [-0.3213,  0.3594,  0.0489,  0.4038, -0.3169],
        [ 0.1048,  0.3901, -0.3059, -0.1866, -0.2989],
        [-0.0664,  0.2578,  0.2714,  0.1200, -0.2181],
        [ 0.2600, -0.1105, -0.2324,  0.1316, -0.3495]])), 
        ('weight_hh_l0', tensor(
       [[ 0.3784, -0.2398,  0.0806,  0.2033, -0.4014, -0.1181],
        [-0.3243, -0.3409, -0.2328, -0.1596,  0.0307, -0.3497],
        [-0.0058,  0.1994,  0.3338, -0.1157, -0.3158,  0.3193],
        [ 0.1708,  0.3534, -0.2362, -0.1357,  0.3582, -0.3219],
        [ 0.2879,  0.1790, -0.3412,  0.1042, -0.2953, -0.0971],
        [ 0.4013, -0.2294, -0.2610, -0.0615, -0.2490, -0.1505]])), 
        ('bias_ih_l0', tensor(
        [ 0.0124, -0.2176,  0.0594,  0.1182,  0.0878, -0.2428])), 
        ('bias_hh_l0', tensor(
        [ 0.1436,  0.1387, -0.2747,  0.3427,  0.3240,  0.1577]))])

# 所有可训练参数,这里和上面状态是一样的
params = list(rnn.parameters())
print(params)
print(len(params))
print(params[0].shape)
	# 追踪计算图
	print(output.grad_fn)
	print(output.grad_fn.next_functions[0][0])
	print(output.grad_fn.next_functions[0][0].next_functions[0][0])
	print(output.grad_fn.next_functions[0][0].next_functions[0][0].next_functions[0][0])

rnn复现

h t = t a n h ( x t W i h T + b i h + h t − 1 W h h T + b h h ) h_{t} = tanh(x_{t}W_{ih}^{T} + b_{ih} + h_{t-1}W_{hh}^{T} + b_{hh}) ht=tanh(xtWihT+bih+ht1WhhT+bhh)

rnn就是围绕这个公式进行计算的,这也是为什么前面提到的四个参数
import torch
import torch.nn as nn

class Rnn(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, bias=True, dropout=0, bidirectional=False, nonlinearity='tanh'):
        super(Rnn, self).__init__()

        self.rnn = nn.RNN(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            nonlinearity=nonlinearity,
            bias=bias,
            batch_first=True,
            bidirectional=bidirectional
        )
        if dropout > 0: 
            self.dropout = nn.Dropout(p=dropout)
        self.reset_params()

    def reset_params(self):
        for i in range(self.rnn.num_layers):
            nn.init.kaiming_normal_(getattr(self.rnn, f'weight_ih_l{i}')) # 初始化输入层到隐藏层权重
            nn.init.kaiming_normal_(getattr(self.rnn, f'weight_hh_l{i}')) # 初始化隐藏层到隐藏层权重
            # 初始化服从标准正态分布随机值
            nn.init.kaiming_normal_(getattr(self.rnn, f'bias_ih_l{i}').unsqueeze(1))
            nn.init.kaiming_normal_(getattr(self.rnn, f'bias_hh_l{i}').unsqueeze(1))

    def forward(self, input, h_0=None):

        batch_size, seq_len, input_size = input.shape
        hidden_size = self.rnn.weight_ih_l0.shape[0]
        if hasattr(self, 'dropout'):
            input = self.dropout(input)
        output = torch.zeros(batch_size, seq_len, hidden_size)
        if h_0 == None:
            h_0 = torch.zeros(1, batch_size, hidden_size) # 这里h_0的状态只给了二维(batch_size, hidden_size)
        # h_0==>(num_layers * num_directions, batch_size, hidden_size)
        h_0 = h_0.squeeze(0)

        for t in range(seq_len):
            # input==>(batch_size, seq_len, input_size)
            # x==>(batch_size, input_size)
            x = input[:, t, :]
            # x==>(batch_size, input_size, 1)
            x = x.unsqueeze(2)

            # weight_ih_l0==>(hidden_size, input_size)
            # w_ih==>(1, hidden_size, input_size)
            w_ih = self.rnn.weight_ih_l0.unsqueeze(0)
            # w_ih==>(batch_size, hidden_size, input_size)
            w_ih = w_ih.tile(batch_size, 1, 1) # 扩充张量的某一维度,其余维度不变

            # weight_hh_l0==>(hidden_size, hidden_size)
            # w_hh==>(batch_size, hidden_size, hidden_size)
            w_hh = self.rnn.weight_hh_l0.unsqueeze(0)
            # w_hh==>(1, hidden_size, hidden_size)
            w_hh = w_hh.tile(batch_size, 1, 1)

            # (batch_size, hidden_size, input_size)@(batch_size, input_size, 1)
            # inputW_ih==>(batch_size, hidden_size, 1)
            inputW_ih = w_ih@x
            # inputW_ih==>(batch_size, hidden_size)
            inputW_ih = inputW_ih.squeeze(-1)

            # (batch_size, hidden_size, hidden_size)@(batch_size, hidden_size, 1)
            # hiddenW_hh==>(batch_size, hidden_size, 1)
            hiddenW_hh = w_hh@h_0.unsqueeze(2)
            # hiddenW_hh==>(batch_size, hidden_size)
            hiddenW_hh = hiddenW_hh.squeeze(-1)

            # h_0 = (batch_size, hidden_size)
            h_0 = torch.tanh(inputW_ih + self.rnn.bias_ih_l0 + hiddenW_hh + self.rnn.bias_hh_l0) # 这里的h_0不在是初始化的,而是前一时刻的

            output[:, t, :] = h_0

        # output==>(batch_size, seq_len, hidden_size)
        # h_0==>(num_layers * num_directions, batch_size, hidden_size)
        return output, h_0.unsqueeze(0) # 这里返回的h_0是最后一个时刻的状态

rnn应用

  • 到这里对于rnn的理解我想差不多了,现在就开始应用一下吧,预测一下sin函数叭那就
  1. 定义预处理数据
class Data:
    def __init__(self, args):
        self.args = args
        self.x, self.y, self.time_steps = self.process()
    def process(self):
        start = random.randint(0, 3)
        time_steps = np.linspace(start=start, stop=start + 10, num=self.args['num_time_steps'])
        data = np.sin(time_steps)
        data = data.reshape(self.args['num_time_steps'], 1)

        x = torch.tensor(data[:-1], dtype=torch.float32)  # 不包含最后一个元素
        x = x.view(1, self.args['num_time_steps'] - 1, 1)
        y = torch.tensor(data[1:], dtype=torch.float32)  # 不包含第0个元素
        y = y.view(1, self.args['num_time_steps'] - 1, 1)
        return x, y, time_steps
  1. 定义网络结构
import torch.nn as nn
from model.nn import Rnn

class Net(nn.Module):
    def __init__(self, input_size, hidden_size, output_size=1, num_layers=1):
        super(Net, self).__init__()

        self.rnn = Rnn( # 使用复现的模型
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
        )
        self.reset_params()
        self.linear = nn.Linear(in_features=hidden_size, out_features=output_size) # 有兴趣的可以复现一下Linear模型,看看情况后面可能出一期

    def reset_params(self):
        for p in self.rnn.parameters():
            nn.init.normal_(p, mean=0.0, std=0.001)

    def forward(self, x, h_0):
        # x==>(batch_size, seq_len, input_size)
        # h_0==>(num_layers, batch_size, hidden_size)
        hidden_size = h_0.shape[2]

        # out==>(batch_size, seq_len, hidden_size)
        # h_prev==>(num_layers, batch_size, hidden_size)
        out, h_prev = self.rnn(x, h_0)

        # out==>(batch_size * seq_len, hidden_size)
        out = out.view(-1, hidden_size)
        # out==>(batch_size * seq_len, output_size) # 这里output_size是Linear中的out_features的值
        out = self.linear(out)

        # out==>(1, batch_size * seq_len, output_size)
        # h_prev==>(num_layers, batch_size, hidden_size)
        return out.unsqueeze(0), h_prev
  1. 定义训练函数
import torch
import torch.nn as nn
import torch.optim as optim
from matplotlib import pyplot as plt
import numpy as np
from model.model import Net
from model.data import Data

def train(args):
    model = Net(
        input_size=args['input_size'],
        hidden_size=args['hidden_size']
    )
    criterion = nn.MSELoss()

    optimizer = optim.Adam(model.parameters(), args['lr'])

    # h_0==>(num_layers, batch_size, hidden_size)
    h_0 = torch.zeros(1, 1, args['hidden_size'])

    for iter in range(args['epoch']):
        # 生成真实数据
        data = Data(args)
        x, y, time_steps = data.x, data.y, data.time_steps

        # 模型训练数据
        output, h_prev = model(x, h_0)
        h_prev = h_prev.detach()  # 剥离出不具有梯度的纯tensor数据

        loss = criterion(output, y)
        model.zero_grad()
        loss.backward()
        optimizer.step()

        if iter % 100 == 0:
            print('iteration: {} loss {}'.format(iter, loss.item()))

    return model, output, h_prev
  1. 定义测试函数
def test(model, args, h_prev):
    # 根据模型训练出来的数据去预测
    # 生成一批真实值
    data = Data(args)
    x, y, time_steps = data.x, data.y, data.time_steps

    predictions = []
    input = x[:, 0, :]
    for _ in range(x.shape[1]):
        input = input.view(1, 1, 1)
        pred, h_prev = model(input, h_prev)
        input = pred
        predictions.append(np.max(pred.detach().numpy().ravel()))
    return x, y, predictions, time_steps
  1. 定义主函数
def main():
    args = { # 超参数
        'lr': 0.01,
        'num_time_steps': 50,  # 生成样本数
        'input_size': 1,
        'hidden_size': 16,
        'epoch': 6000
    }
    model, output, h_prev = train(args) # 训练数据
    x, y, predictions, time_steps = test(model, args, h_prev) # 测试数据

    x = x.data.numpy().ravel()  # 拉成一个一维数组

    plt.scatter(x=time_steps[:-1], y=x)
    plt.plot(time_steps[:-1], x)  # 线
    plt.scatter(x=time_steps[1:], y=predictions)  # 预测x
    plt.show()


if __name__ == '__main__':
    main()

预测sin函数

图中蓝色表示真实值,黄色表示预测值

总结

  • 整体代码结构
|-
|--model文件夹
|---nn.py # 这个不是必须的,这是复现的模型
|---model.py # 定义的网格结构
|---data.py # 数据预处理
|--main.py # 定义训练函数、测试函数、主函数
  • 设计网络步骤
1. 定义预处理数据
2. 定义网络结构
3. 定义训练函数
4. 定义测试函数
5. 定义主函数
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值