h 1 = R N N C e l l ( x 0 , h 1 ) h_1 = RNNCell(x_0, h_1) h1=RNNCell(x0,h1)
本质上RNN就是一个共享权重可复用的LinearLayer
重点:掌握RNN张量的维度尺寸
input.shape = (batchSize, inputSize)
output.shape = (batchSize, hiddenSize)
output张量即hidden张量
dataset.shape = (sqeLen, batchSize, inputSize)
1. 使用RNNCell
import torch
batchSize = 1
seqLen = 3
inputSize = 4
hiddenSize = 2
cell = torch.nn.RNNCell(input_size=inputSize, hidden_size=hiddenSize)
# seqLen即每次输入的序列的长度,x1,x2,x3...
dataSet = torch.randn(seqLen, batchSize, inputSize)
hidden = torch.zeros(batchSize, hiddenSize)
# 完成一次序列长度的计算
for index, input in enumerate(dataSet):
print('='*20, index, '='*20)
print('Input size:', input.shape)
hidden = cell(input, hidden)
print('outputs size:', hidden.shape)
print(hidden)
==================== 0 ====================
Input size: torch.Size([1, 4])
outputs size: torch.Size([1, 2])
tensor([[-0.4574, 0.2046]], grad_fn=<TanhBackward>)
==================== 1 ====================
Input size: torch.Size([1, 4])
outputs size: torch.Size([1, 2])
tensor([[-0.7797, 0.9349]], grad_fn=<TanhBackward>)
==================== 2 ====================
Input size: torch.Size([1, 4])
outputs size: torch.Size([1, 2])
tensor([[-0.9951, 0.2693]], grad_fn=<TanhBackward>)
2. 使用RNN
pyTorch模块的RNN相当于对RNNCell的迭代过程做了封装
上图是一个多层的RNN结构
# RNN网络的层数
numLayers = 1
cell = torch.nn.RNN(input_size=inputSize, hidden_size=hiddenSize,
num_layers=numLayers)
inputs = torch.randn(seqLen, batchSize, inputSize)
hidden = torch.zeros(numLayers, batchSize, hiddenSize)
out, hidden = cell(inputs, hidden)
print('output size:', out.shape)
print('output:', out)
print('hidden size:', hidden.shape)
print('hidden', hidden)
output size: torch.Size([3, 1, 2])
output: tensor([[[ 0.3408, -0.4484]],
[[-0.8622, -0.9828]],
[[ 0.5427, 0.7002]]], grad_fn=<StackBackward>)
hidden size: torch.Size([1, 1, 2])
hidden tensor([[[0.5427, 0.7002]]], grad_fn=<StackBackward>)
3. 使用RNN训练
input: ‘hello’
target: ‘ohlol’
import numpy as np
batchSize = 1
inputSize = 4
hiddenSize = 4
index2char = ['e','h','l','o']
xData = [1,0,2,2,3]
yData = [3,1,2,3,2]
oneHotLookup = np.diag(np.ones(4))
xOneHot = [oneHotLookup[x] for x in xData]
inputs = torch.Tensor(xOneHot).view(-1, batchSize, inputSize)
labels = torch.LongTensor(yData)
class Model(torch.nn.Module):
def __init__(self,inputSize, hiddenSize, batchSize, numLayers):
super(Model, self).__init__()
self.numLayers = numLayers
self.batchSize = batchSize
self.inputSize = inputSize
self.hiddenSize = hiddenSize
self.rnn = torch.nn.RNN(input_size=self.inputSize,
hidden_size=self.hiddenSize,
num_layers=self.numLayers)
def forward(self, input):
hidden = torch.zeros(
self.numLayers,
self.batchSize,
self.hiddenSize
)
out, _ = self.rnn(input, hidden)
return out.view(-1, self.hiddenSize)
net = Model(inputSize, hiddenSize, batchSize, numLayers)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.05)
for epoch in range(15):
optimizer.zero_grad()
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
_, index = outputs.max(dim=1)
index = index.data.numpy()
print('Predicted', ''.join([index2char[x] for x in index]), end='')
print(',Epoch [%d/15] loss = %.3f' % (epoch + 1, loss.item()))
Predicted ehele,Epoch [1/15] loss = 1.516
Predicted eolll,Epoch [2/15] loss = 1.353
Predicted eolll,Epoch [3/15] loss = 1.236
Predicted lolll,Epoch [4/15] loss = 1.145
Predicted oolll,Epoch [5/15] loss = 1.070
Predicted oolol,Epoch [6/15] loss = 1.007
Predicted oolol,Epoch [7/15] loss = 0.952
Predicted oolol,Epoch [8/15] loss = 0.900
Predicted oolol,Epoch [9/15] loss = 0.849
Predicted oolol,Epoch [10/15] loss = 0.800
Predicted oolol,Epoch [11/15] loss = 0.755
Predicted ohlol,Epoch [12/15] loss = 0.717
Predicted ohlol,Epoch [13/15] loss = 0.684
Predicted ohlol,Epoch [14/15] loss = 0.652
Predicted ohlol,Epoch [15/15] loss = 0.621
4. 使用嵌入层
oneHot独热向量数据稀疏,维度高。需要将其数据转移到嵌入层中,便于计算。
numClass = 4
inputSize = 4
hiddenSize = 8
embeddingSize = 10
numLayers = 2
batchSize = 1
seqLen = 5
xData = [1,0,2,2,3]
yData = [3,1,2,3,2]
# input: seqLen, batchSize
inputs = torch.LongTensor(xData).view(5,1)
labels = torch.LongTensor(yData)
class Model(torch.nn.Module):
def __init__(self):
super(Model, self).__init__()
self.emb = torch.nn.Embedding(inputSize, embeddingSize)
self.rnn = torch.nn.RNN(input_size=embeddingSize,
hidden_size=hiddenSize,
num_layers=numLayers,
)
self.fc = torch.nn.Linear(hiddenSize, numClass)
def forward(self, x):
# hidden: numLayers, batchSize, hiddenSize
hidden = torch.zeros(numLayers, x.size(1), hiddenSize)
x = self.emb(x)
x, _ = self.rnn(x, hidden)
x = self.fc(x)
return x.view(-1, numClass)
net = Model()
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.05)
for epoch in range(15):
optimizer.zero_grad()
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
_, index = outputs.max(dim=1)
index = index.data.numpy()
print('Predicted', ''.join([index2char[x] for x in index]), end='')
print(',Epoch [%d/15] loss = %.3f' % (epoch + 1, loss.item()))
Predicted eleee,Epoch [1/15] loss = 1.450
Predicted ohlll,Epoch [2/15] loss = 1.006
Predicted ohlll,Epoch [3/15] loss = 0.738
Predicted ohlol,Epoch [4/15] loss = 0.502
Predicted ohlol,Epoch [5/15] loss = 0.327
Predicted ohlol,Epoch [6/15] loss = 0.223
Predicted ohlol,Epoch [7/15] loss = 0.152
Predicted ohlol,Epoch [8/15] loss = 0.105
Predicted ohlol,Epoch [9/15] loss = 0.074
Predicted ohlol,Epoch [10/15] loss = 0.053
Predicted ohlol,Epoch [11/15] loss = 0.038
Predicted ohlol,Epoch [12/15] loss = 0.028
Predicted ohlol,Epoch [13/15] loss = 0.021
Predicted ohlol,Epoch [14/15] loss = 0.016
Predicted ohlol,Epoch [15/15] loss = 0.012
5. 使用LSTM和GRU
5.1 LSTM长短期记忆网络
numClass = 4
inputSize = 4
hiddenSize = 8
embeddingSize = 10
numLayers = 2
batchSize = 1
seqLen = 5
xData = [1,0,2,2,3]
yData = [3,1,2,3,2]
# input: seqLen, batchSize
inputs = torch.LongTensor(xData).view(5,1)
labels = torch.LongTensor(yData)
class Model(torch.nn.Module):
def __init__(self):
super(Model, self).__init__()
self.emb = torch.nn.Embedding(inputSize, embeddingSize)
self.lstm = torch.nn.LSTM(input_size=embeddingSize,
hidden_size=hiddenSize,
num_layers=numLayers,
)
self.fc = torch.nn.Linear(hiddenSize, numClass)
def forward(self, x):
# hidden.shape: numLayers, batchSize, hiddenSize
# hidden.shape must equals to cell.shape
hidden = torch.zeros(numLayers, x.size(1), hiddenSize)
cell = torch.zeros(numLayers, x.size(1), hiddenSize)
x = self.emb(x)
x, _ = self.lstm(x, (hidden, cell))
x = self.fc(x)
return x.view(-1, numClass)
net = Model()
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.05)
for epoch in range(15):
optimizer.zero_grad()
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
_, index = outputs.max(dim=1)
index = index.data.numpy()
print('Predicted', ''.join([index2char[x] for x in index]), end='')
print(',Epoch [%d/15] loss = %.3f' % (epoch + 1, loss.item()))
Predicted eeeee,Epoch [1/15] loss = 1.533
Predicted eeeee,Epoch [2/15] loss = 1.464
Predicted ooooo,Epoch [3/15] loss = 1.389
Predicted ooooo,Epoch [4/15] loss = 1.302
Predicted ooooo,Epoch [5/15] loss = 1.212
Predicted oooll,Epoch [6/15] loss = 1.123
Predicted oolll,Epoch [7/15] loss = 1.036
Predicted ollll,Epoch [8/15] loss = 0.954
Predicted ollll,Epoch [9/15] loss = 0.879
Predicted ohlll,Epoch [10/15] loss = 0.804
Predicted ohlll,Epoch [11/15] loss = 0.735
Predicted ohlll,Epoch [12/15] loss = 0.685
Predicted ohlll,Epoch [13/15] loss = 0.637
Predicted ohlll,Epoch [14/15] loss = 0.586
Predicted ohlll,Epoch [15/15] loss = 0.533
5.2 GRU门控循环单元
numClass = 4
inputSize = 4
hiddenSize = 8
embeddingSize = 10
numLayers = 2
batchSize = 1
seqLen = 5
xData = [1,0,2,2,3]
yData = [3,1,2,3,2]
# input: seqLen, batchSize
inputs = torch.LongTensor(xData).view(5,1)
labels = torch.LongTensor(yData)
class Model(torch.nn.Module):
def __init__(self):
super(Model, self).__init__()
self.emb = torch.nn.Embedding(inputSize, embeddingSize)
self.gru = torch.nn.GRU(input_size=embeddingSize,
hidden_size=hiddenSize,
num_layers=numLayers,
)
self.fc = torch.nn.Linear(hiddenSize, numClass)
def forward(self, x):
# hidden: numLayers, batchSize, hiddenSize
hidden = torch.zeros(numLayers, x.size(1), hiddenSize)
x = self.emb(x)
x, _ = self.gru(x, hidden)
x = self.fc(x)
return x.view(-1, numClass)
net = Model()
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.05)
for epoch in range(15):
optimizer.zero_grad()
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
_, index = outputs.max(dim=1)
index = index.data.numpy()
print('Predicted', ''.join([index2char[x] for x in index]), end='')
print(',Epoch [%d/15] loss = %.3f' % (epoch + 1, loss.item()))
Predicted hhhhh,Epoch [1/15] loss = 1.424
Predicted ohhll,Epoch [2/15] loss = 1.244
Predicted ohlll,Epoch [3/15] loss = 1.098
Predicted ollll,Epoch [4/15] loss = 0.988
Predicted ollll,Epoch [5/15] loss = 0.901
Predicted ohlll,Epoch [6/15] loss = 0.795
Predicted ohlll,Epoch [7/15] loss = 0.698
Predicted ohlll,Epoch [8/15] loss = 0.622
Predicted ohlll,Epoch [9/15] loss = 0.561
Predicted ohlll,Epoch [10/15] loss = 0.507
Predicted ohlll,Epoch [11/15] loss = 0.450
Predicted ohlll,Epoch [12/15] loss = 0.377
Predicted ohlol,Epoch [13/15] loss = 0.310
Predicted ohlol,Epoch [14/15] loss = 0.263
Predicted ohlol,Epoch [15/15] loss = 0.216