pytorch实现BiLSTM代码

 LSTM整体架构图如下:

 遗忘门如下:

第一个遗忘门得到的结果是不是全都属于0-1的数,相当于不同的权重。

输入门(其实也可以叫更新门)如下: 

 输出门如下:

 对于输出门,有两个分支,一个是直接变成下一层的隐藏变量,一个是表示这一层的输出。

 代码来源:BiLSTM的PyTorch应用 - mathor


    
    
  1. '''
  2. code by Tae Hwan Jung(Jeff Jung) @graykode, modify by wmathor
  3. '''
  4. import torch
  5. import numpy as np
  6. import torch.nn as nn
  7. import torch.optim as optim
  8. import torch.utils.data as Data
  9. dtype = torch.FloatTensor

准备数据 


    
    
  1. sentence = (
  2. 'GitHub Actions makes it easy to automate all your software workflows from continuous integration and delivery to issue triage and more'
  3. )
  4. word 2idx = {w: i for i, w in enumerate(list( set( sentence.split())))}
  5. idx 2word = {i: w for i, w in enumerate(list( set( sentence.split())))}
  6. n_ class = len(word 2idx) # classification problem
  7. max_len = len( sentence.split())
  8. n_hidden = 5

    
    
  1. #word 2idx ={ 'automate': 0, 'all': 1, 'and': 2, 'integration': 3, 'your': 4, 'issue': 5, 'continuous': 6, 'triage': 7'delivery': 8, 'Actions': 9,
  2. 'from': 10, 'easy': 11, 'software': 12, 'makes': 13, 'it': 14, 'workflows': 15, 'GitHub': 16, 'to': 17, 'more': 18}
  3. #id 2word就直接与word 2idx的键值对调换就行了
  4. #n_ class = 19
  5. #max_len = 21

处理数据 


    
    
  1. def make_ data( sentence):
  2. input_batch = []
  3. target_batch = []
  4. words = sentence.split()
  5. for i in range(max_len - 1):
  6. input = [word 2idx[n] for n in words[:(i + 1)]]
  7. input = input + [- 1] * (max_len - len( input))
  8. target = word 2idx[words[i + 1]]
  9. input_batch.append(np.eye(n_ class)[ input])
  10. target_batch.append(target)
  11. return torch.Tensor( input_batch), torch.LongTensor(target_batch)
  12. # input_batch: [max_len - 1, max_len, n_ class]
  13. input_batch, target_batch = make_ data( sentence)
  14. dataset = Data.TensorDataset( input_batch, target_batch)
  15. loader = Data.DataLoader(dataset, 16, True)# 16表示batch_ size根据自己的电脑配置更改,

 相关变量可视化


    
    
  1. class BiLSTM(nn.Module):
  2. def __init__( self):
  3. super(BiLSTM, self).__init__()
  4. self.lstm = nn.LSTM( input_ size =n_ class, hidden_ size =n_hidden, bidirectional = True)
  5. # fc
  6. self.fc = nn.Linear(n_hidden * 2, n_ class)
  7. def forward( self, X):
  8. # X: [batch_ size, max_len, n_ class]
  9. batch_ size = X.shape[ 0]
  10. input = X.transpose( 0, 1) # input : [max_len, batch_ size, n_ class]
  11. hidden_state = torch.randn( 1 * 2, batch_ size, n_hidden) # [num_layers( = 1) * num_directions( = 2), batch_ size, n_hidden]
  12. cell_state = torch.randn( 1 * 2, batch_ size, n_hidden) # [num_layers( = 1) * num_directions( = 2), batch_ size, n_hidden]
  13. outputs, (_, _) = self.lstm( input, (hidden_state, cell_state))
  14. outputs = outputs[- 1] # [batch_ size, n_hidden * 2]
  15. model = self.fc(outputs) # model : [batch_ size, n_ class]
  16. return model
  17. model = BiLSTM()
  18. criterion = nn.CrossEntropyLoss()
  19. optimizer = optim.Adam(model.parameters(), lr = 0.001)

 pytorch对于LSTM的输入输出格式如下图所示:

 


    
    
  1. # Training
  2. for epoch in range( 10000):
  3. for x, y in loader:
  4. pred = model(x)
  5. loss = criterion(pred, y)
  6. if (epoch + 1) % 1000 = = 0:
  7. print( 'Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'. format(loss))
  8. optimizer. zero_grad()
  9. loss.backward()
  10. optimizer.step()

 


    
    
  1. # Pred
  2. predict = model( input_batch). data.max( 1, keepdim = True)[ 1]
  3. print( sentence)
  4. print([idx 2word[n.item()] for n in predict.squeeze()])

 效果比原作者的效果要好一点,因为占位符我更改成了 -1,这样就不会影响到标签了。

注意,对于代码的模型的前项传播不懂的,可以看下面:

 


    
    
  1. class BiLSTM_ 1(nn.Module):
  2. def __init__( self):
  3. super(BiLSTM_ 1, self).__init__()
  4. self.lstm = nn.LSTM( input_ size = 10, hidden_ size = 5, bidirectional = False)
  5. # fc
  6. self.fc = nn.Linear(n_hidden * 1, n_ class)
  7. def forward( self, X):
  8. # X: [batch_ size, max_len, n_ class]
  9. batch_ size = X.shape[ 0]
  10. input = X.transpose( 0, 1) # input : [max_len, batch_ size, n_ class]
  11. #随机初试化隐藏变量和记忆细胞变量
  12. hidden_state = torch.randn( 1 * 1, batch_ size, n_hidden) # [num_layers( = 1) * num_directions( = 2), batch_ size, n_hidden]
  13. cell_state = torch.randn( 1 * 1, batch_ size, n_hidden) # [num_layers( = 1) * num_directions( = 2), batch_ size, n_hidden]
  14. outputs, (hc, c) = self.lstm( input, (hidden_state, cell_state))
  15. outputs = outputs # [batch_ size, n_hidden * 2]
  16. #model = self.fc(outputs) # model : [batch_ size, n_ class]
  17. return outputs,hc, c
  18. model = BiLSTM_ 1()

    
    
  1. a =torch.randn( 2,5,10)
  2. output,hc,c =model(a)

 其实每一层的输出,都直接拼接在一起了,而hc只表示最后一层的输出,所以output[-1]==hc的。

最后祝大家学有所成!

文章知识点与官方知识档案匹配,可进一步学习相关知识
Python入门技能树人工智能深度学习 366793 人正在系统学习中
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
以下是一个简单的基于pytorch使用biLSTM实现一维语音分类的代码示例: ```python import torch import torch.nn as nn import torch.optim as optim import numpy as np # 构建BiLSTM模型 class BiLSTM(nn.Module): def __init__(self, input_size, hidden_size, num_layers, num_classes): super(BiLSTM, self).__init__() self.hidden_size = hidden_size self.num_layers = num_layers self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional=True) self.fc = nn.Linear(hidden_size*2, num_classes) def forward(self, x): h0 = torch.zeros(self.num_layers*2, x.size(0), self.hidden_size).to(device) c0 = torch.zeros(self.num_layers*2, x.size(0), self.hidden_size).to(device) out, _ = self.lstm(x, (h0, c0)) out = self.fc(out[:, -1, :]) return out # 超参数设置 input_size = 1 hidden_size = 64 num_layers = 2 num_classes = 10 batch_size = 64 learning_rate = 0.001 num_epochs = 10 # 准备数据 train_data = np.load('train_data.npy') train_labels = np.load('train_labels.npy') train_data = np.expand_dims(train_data, axis=2) train_labels = torch.from_numpy(train_labels).long() train_dataset = torch.utils.data.TensorDataset(torch.from_numpy(train_data), train_labels) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True) # 定义设备 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # 初始化模型、损失函数和优化器 model = BiLSTM(input_size, hidden_size, num_layers, num_classes).to(device) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=learning_rate) # 训练模型 for epoch in range(num_epochs): for i, (data, labels) in enumerate(train_loader): data = data.to(device) labels = labels.to(device) outputs = model(data) loss = criterion(outputs, labels) optimizer.zero_grad() loss.backward() optimizer.step() if (i+1) % 10 == 0: print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, i+1, len(train_loader), loss.item())) # 测试模型 model.eval() test_data = np.load('test_data.npy') test_labels = np.load('test_labels.npy') test_data = np.expand_dims(test_data, axis=2) test_labels = torch.from_numpy(test_labels).long() test_dataset = torch.utils.data.TensorDataset(torch.from_numpy(test_data), test_labels) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False) with torch.no_grad(): correct = 0 total = 0 for data, labels in test_loader: data = data.to(device) labels = labels.to(device) outputs = model(data) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() print('Test Accuracy of the model on the {} test images: {} %'.format(total, 100 * correct / total)) ``` 在这个例子中,我们使用了一个由两个双向LSTM层组成的模型,并将其应用于一维语音分类任务。我们使用Adam优化器来更新模型参数,并使用交叉熵损失函数来计算损失。在训练完成后,我们在测试集上评估模型的准确率。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值