pytorch中的RNN
输入为序列X和H
X : [seq_len, batch_size, feature_len]
H_t : [num_layers,batch_size, hidden_len]输出为OUT,H
out: [seq_len, batch_size, hidden_len]
h: [num_layers,batch_size, hidden_len]
out取所有时间点最后一个layers的状态,横着
h取最后时间点所有layers的状态,竖着h t + 1 h_ {t+1} ht+1 = tanh( x t x_ {t} xt@ w x h w_ {xh} wxh + h t h_ {t} ht@ w h h w_ {hh} whh )
rnn = nn.RNN(input_size=100, hidden_size=20, num_layers=1)input_size = feature_len
hidden_size = hidden_len
num_layers = defaultrnn = nn.RNN(input_size=100, hidden_size=20, num_layers=1)
x = torch.randn(10, 3, 100)
out, h = rnn(x, torch.zeros(1, 3, 20))out: torch.Size([10, 3, 20])
h: torch.Size([1, 3, 20])
import torch
from torch import nn
from torch import optim
from torch.nn import functional as F
# X : [seq_len, batch_size, feature_len]
# X_t : [batch_size, feature_len]
# W_xh : [hidden_len, feature_len]
# H_t : [batch_size, hidden_len]
# W_hh : [hidden_len, hidden_len]
# [batch_size, hidden_len] = ( [batch_size, feature_len] @ [hidden_len, feature_len].T+
# [batch_size, hidden_len] @ [hidden_len, hidden_len].T )
# H_t+1 = ( X_t @ W_xh + H_t @ W_hh )
# rnn = nn.RNN(input_size=100, hidden_size=20, num_layers=1)
# input_size = feature_len
# hidden_size = hidden_len
# num_layers = default
def main():
rnn = nn.RNN(input_size=100, hidden_size=20, num_layers=1)
print(rnn)
x = torch.randn(10, 3, 100)
# x:[seq_len,batch_size,feature_len]
# h_t :[num_layers,batch_size,hidden_len]
out, h = rnn(x, torch.zeros(1, 3, 20))
print("out: ",out.shape, "h: ",h.shape)
# out: torch.Size([10, 3, 20])
# h: torch.Size([1, 3, 20])
# out取所有时间点最后一个layers的状态,横着
# h取最后时间点所有layers的状态,竖着
rnn = nn.RNN(input_size=100, hidden_size=20, num_layers=4)
print(rnn)
x = torch.randn(10, 3, 100)
out, h = rnn(x, torch.zeros(4, 3, 20))
print("out: ",out.shape, "h: ",h.shape)
# out: torch.Size([10, 3, 20])
# h: torch.Size([4, 3, 20])
# print(vars(rnn))
print('rnn by cell')
#每次喂一个时间点/单词的数据
cell1 = nn.RNNCell(100, 20)
h1 = torch.zeros(3, 20)
for xt in x:
h1 = cell1(xt, h1)
print(h1.shape)
# 两层的RNN
cell1 = nn.RNNCell(100, 30)
cell2 = nn.RNNCell(30, 20)
h1 = torch.zeros(3, 30)
h2 = torch.zeros(3, 20)
for xt in x:
h1 = cell1(xt, h1)
h2 = cell2(h1, h2)
print(h2.shape)
print('Lstm')
lstm = nn.LSTM(input_size=100, hidden_size=20, num_layers=4)
print(lstm)
x = torch.randn(10, 3, 100)
out, (h, c) = lstm(x)
print(out.shape, h.shape, c.shape)
print('one layer lstm')
cell = nn.LSTMCell(input_size=100, hidden_size=20)
h = torch.zeros(3, 20)
c = torch.zeros(3, 20)
for xt in x:
h, c = cell(xt, [h, c])
print(h.shape, c.shape)
print('two layer lstm')
cell1 = nn.LSTMCell(input_size=100, hidden_size=30)
cell2 = nn.LSTMCell(input_size=30, hidden_size=20)
h1 = torch.zeros(3, 30)
c1 = torch.zeros(3, 30)
h2 = torch.zeros(3, 20)
c2 = torch.zeros(3, 20)
for xt in x:
h1, c1 = cell1(xt, [h1, c1])
h2, c2 = cell2(h1, [h2, c2])
print(h2.shape, c2.shape)
if __name__ == '__main__':
main()
RNN预测SIN函数
import numpy as np
import torch
import torch.nn as nn
from torch import optim
from matplotlib import pyplot as plt
import numpy as np
seq_len = 50 #句子长度
batch_size = 1 #batch_size
feature_len = 1 #文字向量长度
input_size = 1
hidden_size = 10 #W_h第二维长度
num_layers = 1 #rnn堆叠层数
out_size = 1
lr = 0.01
num_time_steps = 50
class seris_(nn.Module):
def __init__(self):
super(seris_, self).__init__()
self.rnn = nn.RNN(
input_size=input_size,
hidden_size=hidden_size,
num_layers=num_layers,
batch_first=True
)
self.linear = nn.Linear(hidden_size,out_size)
def forward(self,x,h0):
out,h = self.rnn(x,h0)
# out:[batch_size,seq_len,hidden_size]
out = out.view(-1,hidden_size)
out = self.linear(out)
out = out.unsqueeze(dim=0)
return out,h
model = seris_()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(),lr)
hidden_prev = torch.zeros(1,batch_size,hidden_size)
for iter in range(600):
start = np.random.randint(10,size=1)[0]
time_steps = np.linspace(start,start+10,num_time_steps)
data = np.sin(time_steps)
data = data.reshape(num_time_steps,1)
# print(data.shape)
x = torch.tensor(data[:-1]).float()
x = x.view(batch_size,num_time_steps-1,1)
y = torch.tensor(data[1:]).float()
y = y.view(batch_size,num_time_steps-1,1)
# print(x.shape)
output,hidden_prev = model(x,hidden_prev)
hidden_prev = hidden_prev.detach()
# print(output.shape)
loss = criterion(output,y)
model.zero_grad()
loss.backward()
optimizer.step()
if iter % 100 == 0:
print("Iteration: {} loss {}".format(iter, loss.item()))
start = np.random.randint(3, size=1)[0]
time_steps = np.linspace(start, start + 10, num_time_steps)
data = np.sin(time_steps)
data = data.reshape(num_time_steps, 1)
x = torch.tensor(data[:-1]).float().view(1, num_time_steps - 1, 1)
y = torch.tensor(data[1:]).float().view(1, num_time_steps - 1, 1)
predictions = []
input = x[:, 0, :]
for _ in range(x.shape[1]):
input = input.view(1, 1, 1)
(pred, hidden_prev) = model(input, hidden_prev)
input = pred
predictions.append(pred.detach().numpy().ravel()[0])
x = x.data.numpy().ravel()
y = y.data.numpy()
plt.scatter(time_steps[:-1], x.ravel(), s=90)
plt.plot(time_steps[:-1], x.ravel())
plt.scatter(time_steps[1:], predictions)
plt.show()