今天BiLSTM的学习记录
照着大牛的博客学习的,详细的内容可以看该大牛的介绍。
'''
BiLSTM 模型学习与实现
luer 2021-04-13
'''
# 导库
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as Data
# dtype = torch.FloatTensor tensor默认类型不就是float?
# 准备数据
sentence = (
'GitHub Actions makes it easy to automate all your software workflows '
'from continuous integration and delivery to issue triage and more'
)
# print(sentence, type(sentence)) # 与下方句子等价,类型是 str
# sentence = "GitHub Actions makes it easy to automate all your software workflows from continuous integration and delivery to issue triage and more"
word2idx = {w: i for i, w in enumerate(list(set(sentence.split())))} # set->无序不重复 与c++不同
idx2word = {i: w for i, w in enumerate(list(set(sentence.split())))}
n_class = len(word2idx) # classification problem
max_len = len(sentence.split()) # 21 最大长度应用在数据预处理
n_hidden = 5
# 数据预处理
def make_data(sentence):
input_batch = []
target_batch = []
words = sentence.split()
for i in range(max_len - 1): # 20
input = [word2idx[n] for n in words[:(i+1)]] # 取出[0,i+1) words对应的单词 送入到 word2idx 得到每个词的id索引
input = input + [0] * (max_len - len(input))
target = word2idx[words[i + 1]] # 当前单词的下一个单词的下标
input_batch.append(np.eye(n_class)[input]) # 把每个id对应的单词表示成one hot表示形式
target_batch.append(target)
return torch.Tensor(input_batch), torch.LongTensor(target_batch)
input_batch, target_batch = make_data(sentence) # [max_len-1, max_len, n_class] [20,21,19]
dataset = Data.TensorDataset(input_batch, target_batch)
loader = Data.DataLoader(dataset, 16, True)
class BiLSTM(nn.Module):
def __init__(self):
super(BiLSTM, self).__init__()
self.lstm = nn.LSTM(input_size = n_class, hidden_size = n_hidden, bidirectional = True)
self.fc = nn.Linear(n_hidden * 2, n_class)
def forward(self, X):
# X:[batch_size, max_len, n_class]
batch_size = X.shape[0] # 0维度 20
input = X.transpose(0, 1) # 0 1维调换位置 [max_len, batch_size, n_class]
hidden_state = torch.randn(1*2, batch_size, n_hidden) # num_layer(1)*num_directions(2), batch_size, n_hidden
cell_state = torch.randn(1*2, batch_size, n_hidden) # num_layer(1)*num_directions(2), batch_size, n_hidden
outputs,(_, _) = self.lstm(input, (hidden_state, cell_state))
outputs = outputs[-1] # [batch_size, n_hidden*2]
model = self.fc(outputs) # model: [batch_size, n_class]
return model
model = BiLSTM()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = 0.001)
# 训练&测试
for epoch in range(10000):
for x, y in loader:
pred = model(x)
loss = criterion(pred, y)
if (epoch + 1) % 1000 == 0:
print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))
optimizer.zero_grad()
loss.backward()
optimizer.step()
# tensor.data.max 与 tensor.max 一样啊
predict = model(input_batch).max(1, keepdim = True)[1] # 找到第2维度的最大值的索引
print("predict: ", predict, predict.size())
print(sentence)
print([idx2word[n.item()] for n in predict.squeeze()])