LSTM层输出中output和hidden
定义
-
output
包含LSTM每个时间步t的输出特征, -
h_t
表示LSTM最后一层的输出特征。在单向LSTM中,output的最后一个时间步维度的输出
output[:, -1, :]
等于hidden;在双向LSTM层中,可以通过
拼接
output的最后一个时间步维度正反向的输出,来得到和hidden一样的输出。另外,注意控制nn.LSTM()中:#
batch_first=True
,将喂入LSTM的数据中batchsize维度提前,如果输入维度中batchsize已经在第一个维度,故无需设置
构建简单模型验证
构建简单模型验证
from torch import nn
class Config(object):
def __init__(self, vocab_size, embed_dim, label_num):
self.vocab_size = vocab_size
self.embed_dim = embed_dim
self.label_num = label_num
self.bidirectional = False
self.num_directions = 2 if self.bidirectional else 1
self.hidden_size = 128
self.num_layer = 1
class Model(nn.Module):
def __init__(self, config):
super().__init__()
self.embedding = nn.Embedding(config.vocab_size, config.embed_dim, padding_idx=config.vocab_size - 1)
self.lstm = nn.LSTM(config.embed_dim, config.hidden_size, config.num_layer, batch_first= True, # batch_first=True,将output中batchsize维度提前,hidden不受影响
bidirectional= config.bidirectional)
self.fc = nn.Linear(config.hidden_size * config.num_directions, config.label_num)
def forward(self, input):
embed = self.embedding(input)
lstm_out, (hidden, cell) = self.lstm(embed)
output = lstm_out[:, -1, :]
return hidden, output
import torch
import numpy as np
vocab_size = 100
embed_dim = 64
label_num = 2
epoch = 40
config = Config(vocab_size, embed_dim, label_num)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Model(config).to(device)
print(model)
x = abs(np.random.randn(128, 200))
print(x)
datas =torch.from_numpy(x).long().to(device)
hidden, output = model(datas)
hidden==output
tensor([[[True, True, True, ..., True, True, True],
[True, True, True, ..., True, True, True],
[True, True, True, ..., True, True, True],
...,
[True, True, True, ..., True, True, True],
[True, True, True, ..., True, True, True],
[True, True, True, ..., True, True, True]]])