动手学深度学习PyTorch版-注意力机制与Seq2seq模型

最新推荐文章于 2024-05-27 16:53:15 发布

hongyesuifeng

最新推荐文章于 2024-05-27 16:53:15 发布

阅读量393

点赞数

分类专栏： python 机器学习

本文链接：https://blog.csdn.net/hongyesuifeng/article/details/104367410

版权

注意力机制与Seq2seq模型

import math
import torch 
import torch.nn as nn

import os
def file_name_walk(file_dir):
    for root, dirs, files in os.walk(file_dir):
#         print("root", root)  # 当前目录路径
         print("dirs", dirs)  # 当前路径下所有子目录
         print("files", files)  # 当前路径下所有非目录子文件

file_name_walk("/home/kesci/input/fraeng6506")

softmax屏蔽

def SequenceMask(X, X_len,value=-1e6):
    maxlen = X.size(1)
    #print(X.size(),torch.arange((maxlen),dtype=torch.float)[None, :],'\n',X_len[:, None] )
    mask = torch.arange((maxlen),dtype=torch.float)[None, :] >= X_len[:, None]   
    #print(mask)
    X[mask]=value
    return X

def masked_softmax(X, valid_length):
    # X: 3-D tensor, valid_length: 1-D or 2-D tensor
    softmax = nn.Softmax(dim=-1)
    if valid_length is None:
        return softmax(X)
    else:
        shape = X.shape
        if valid_length.dim() == 1:
            try:
                valid_length = torch.FloatTensor(valid_length.numpy().repeat(shape[1], axis=0))#[2,2,3,3]
            except:
                valid_length = torch.FloatTensor(valid_length.cpu().numpy().repeat(shape[1], axis=0))#[2,2,3,3]
        else:
            valid_length = valid_length.reshape((-1,))
        # fill masked elements with a large negative, whose exp is 0
        X = SequenceMask(X.reshape((-1, shape[-1])), valid_length)
 
        return softmax(X).reshape(shape)

masked_softmax(torch.rand((2,2,4),dtype=torch.float), torch.FloatTensor([2,3]))

tensor([[[0.5423, 0.4577, 0.0000, 0.0000],
[0.5290, 0.4710, 0.0000, 0.0000]],

    [[0.2969, 0.2966, 0.4065, 0.0000],
     [0.3607, 0.2203, 0.4190, 0.0000]]])

超出二维矩阵乘法

torch.bmm(torch.ones((2,1,3), dtype = torch.float), torch.ones((2,3,2), dtype = torch.float))

点积注意力

# Save to the d2l package.
class DotProductAttention(nn.Module): 
    def __init__(self, dropout, **kwargs):
        super(DotProductAttention, self).__init__(**kwargs)
        self.dropout = nn.Dropout(dropout)

    # query: (batch_size, #queries, d)
    # key: (batch_size, #kv_pairs, d)
    # value: (batch_size, #kv_pairs, dim_v)
    # valid_length: either (batch_size, ) or (batch_size, xx)
    def forward(self, query, key, value, valid_length=None):
        d = query.shape[-1]
        # set transpose_b=True to swap the last two dimensions of key
        
        scores = torch.bmm(query

最低0.47元/天解锁文章

hongyesuifeng

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
动手学深度学习PyTorch版-注意力机制与Seq2seq模型

注意力机制与Seq2seq模型import mathimport torch import torch.nn as nnimport osdef file_name_walk(file_dir): for root, dirs, files in os.walk(file_dir):# print("root", root) # 当前目录路径 ...
复制链接

扫一扫