九、循环神经网络(基础篇)
课程链接:Pytorch 深度学习实践——循环神经网络(基础篇)
PS:由于本人的研究方向是语音识别(Seq2Seq),所以CNN这部分就先跳过了,后面如果有学习上的需要再回来补充。
1、什么是RNN?
h t = t a n h ( W i h x t + b i h + W h h h t − 1 + b h h ) h_t=tanh(W_{ih}x_t+b_{ih}+W_{hh}h_{t-1}+b_{hh}) ht=tanh(Wihxt+bih+Whhht−1+bhh)
2、RNN Cell in Pytorch
import torch
batch_size = 1
seq_len = 3
input_size = 4
hidden_size = 2
cell = torch.nn.RNNCell(input_size=input_size, hidden_size=hidden_size)
dataset = torch.randn(seq_len, batch_size, input_size)
hidden = torch.zeros(batch_size, hidden_size)
for idx, input in enumerate(dataset):
print('=' * 20, idx, '=' * 20)
print('Input Size: ', input.shape)
# input of shape=(batch_size,input_size), output of shape=(batch_size,hidden_size)
hidden = cell(input, hidden) # h1 = cell(x1, h0)
print('Output Size: ', hidden.shape)
print(hidden)
3、RNN in Pytorch
注释:inputs:{x1,x2,…,xN}、hidden(传入):h0、out:{h1,h2,…,hN}、hidden(传出):hN
cell = torch.nn.RNN(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers)
# Input: inputs of shape=(seq_len,batch_size,input_size), hidden of shape=(num_layers,batch_size,hidden_size)
# Output: out of shape=(seq_len,batch_size,hidden_size), hidden of shape=(num_layers,batch_size,hidden_size)
out, hidden = cell(inputs, hidden)
numlayers
import torch
batch_size = 1
seq_len = 3
input_size = 4
hidden_size = 2
num_layers = 1
cell = torch.nn.RNN(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers)
inputs = torch.randn(seq_len, batch_size, input_size)
hidden = torch.zeros(num_layers, batch_size, hidden_size)
out, hidden = cell(inputs, hidden)
print('Output size: ', out.shape)
print('Output: ', out)
print('Hidden size: ', hidden.shape)
print('Hidden: ', hidden)
Example:train a model to learn “hello” -> “ohlol”
①The input of RNNCell should be vectors of numbers——将字符建立索引
②Loss Function——相当于多分类问题
③Prepare Data
batch_size = 1
input_size = 4
hidden_size = 2
# Prepare Data
idx2char = ['e', 'h', 'l', 'o'] # dictionary
x_data = [1, 0, 2, 2, 3] # input为“hello”
y_data = [3, 1, 2, 3, 2] # output为"ohlol"
one_hot_lookup = [[1, 0, 0, 0],
[0, 1, 0, 0],
[0, 0, 1, 0],
[0, 0, 0, 1]]
x_one_hot = [one_hot_lookup[x] for x in x_data]
inputs = torch.Tensor(x_one_hot).view(-1, batch_size, input_size) #-1即表示seq_len
labels = torch.LongTensor(y_data).view(-1, 1)
④Design Model
# Design Model
class Modol(torch.nn.Module):
def __init__(self, input_size, hidden_size, batch_size):
super(Modol, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.batch_size = batch_size
self.rnncell = torch.nn.RNNCell(input_size=input_size, hidden_size=hidden_size)
def forward(self, input, hidden):
hidden = self.rnncell(input, hidden)
return hidden
def init_hidden(self):
return torch.zeros(self.batch_size, self.hidden_size)
model = Modol(input_size, hidden_size, batch_size)
⑤Loss Function And Optimizer
# Loss Function And Optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.1)
⑥Training Cycle
# Training Cycle
for epoch in range(15):
loss = 0
optimizer.zero_grad()
hidden = model.init_hidden()
print('Predicted string: ', end='')
for input, label in zip(inputs, labels):
hidden = model(input, hidden)
loss += criterion(hidden, label)
_, idx = hidden.max(dim=1)
print(idx2char[idx.item()], end='')
loss.backward()
optimizer.step()
print(', Epoch [%d/15] loss=%.4f' % (epoch+1, loss.item()))
⑦Change Model
# Design Model
class Modol(torch.nn.Module):
def __init__(self, input_size, hidden_size, batch_size, num_layers = 1):
super(Modol, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.batch_size = batch_size
self.num_layers = num_layers
self.rnn = torch.nn.RNN(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers)
def forward(self, input):
hidden = torch.zeros(self.num_layers, self.batch_size, self.hidden_size)
out, _ = self.rnn(input, hidden)
return out.view(-1, self.hidden_size) # Reshape out to : (seqlen*batchsize, hiddensize)
model = Modol(input_size, hidden_size, batch_size, num_layers)
4、Embedding in Pytorch(one-hot vectors to Embedding vectors)
# Design Model
class Modol(torch.nn.Module):
def __init__(self):
super(Modol, self).__init__()
self.emb = torch.nn.Embedding(input_size, embedding_size)
# input of RNN:(batch,seqlen,embeddingsize) output of RNN:(batch,seqlen,hiddensize)
self.rnn = torch.nn.RNN(input_size=embedding_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
# input of FC:(batch,seqlen,hiddensize) output of FC:(batch,seqlen,numclass)
self.fc = torch.nn.Linear(hidden_size, num_class)
def forward(self, x):
hidden = torch.zeros(num_layers, x.size(0), hidden_size)
x = self.emb(x) # (batch, seqlen, embeddingsize)
x, _ = self.rnn(x, hidden)
x = self.fc(x)
return x.view(-1, num_class) # Reshape out to : (seqlen*batchsize, num_class)
model = Modol()
5、作业1-LSTM(可以避免梯度消失)
参考博客:LSTM算法详细解析
①遗忘门:Forget Gate
遗忘门的计算公式:
f
t
=
σ
(
W
f
[
h
t
−
1
,
x
t
]
+
b
f
)
f_t=\sigma(W_f[h_{t-1},x_t]+b_f)
ft=σ(Wf[ht−1,xt]+bf)
②输入门:Input Gate
输入门计算公式:
i
t
=
σ
(
W
i
[
h
t
−
1
,
x
t
]
+
b
i
)
i_t=\sigma(W_i[h_{t-1},x_t]+b_i)
it=σ(Wi[ht−1,xt]+bi)
g
t
=
t
a
n
h
(
W
C
[
h
t
−
1
,
x
t
]
+
b
C
)
g_t=tanh(W_C[h_{t-1},x_t]+b_C)
gt=tanh(WC[ht−1,xt]+bC)
③Cell State
Cell State的计算公式:
C
t
=
f
t
∗
C
t
−
1
+
i
t
∗
g
t
C_t=f_t*C_{t-1}+i_t*g_t
Ct=ft∗Ct−1+it∗gt
④输出门:Output Gate
输出门计算公式:
o
t
=
σ
(
W
o
[
h
t
−
1
,
x
t
]
+
b
o
)
o_t=\sigma(W_o[h_{t-1},x_t]+b_o)
ot=σ(Wo[ht−1,xt]+bo)
h
t
=
o
t
∗
t
a
n
h
(
C
t
)
h_t=o_t*tanh(C_t)
ht=ot∗tanh(Ct)
# Design Model
class Modol(torch.nn.Module):
def __init__(self):
super(Modol, self).__init__()
self.emb = torch.nn.Embedding(input_size, embedding_size)
# input of RNN:(batch,seqlen,embeddingsize) output of RNN:(batch,seqlen,hiddensize)
self.lstm = torch.nn.LSTM(input_size=embedding_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
# input of FC:(batch,seqlen,hiddensize) output of FC:(batch,seqlen,numclass)
self.fc = torch.nn.Linear(hidden_size, num_class)
def forward(self, x):
hidden = torch.zeros(num_layers, x.size(0), hidden_size)
c = torch.zeros(num_layers, x.size(0), hidden_size)
x = self.emb(x) # (batch, seqlen, embeddingsize)
x, _ = self.lstm(x, (hidden, c))
x = self.fc(x)
return x.view(-1, num_class) # Reshape out to : (seqlen*batchsize, num_class)
model = Modol()