import math
import torch
from torch import nn
from d2l import torch as d2l

"""通过在最后一个轴上掩蔽元素来执行softmax操作"""
# X:3D张量，valid_lens:1D或2D张量
if valid_lens is None:
return nn.functional.softmax(X, dim=-1) # dim=-1代表以X的最后一个维度进行softmax，对于多维的X来说dim=-1相当于dim=2，即是对行进行softmax
else:
shape = X.shape
if valid_lens.dim() == 1:
valid_lens = torch.repeat_interleave(valid_lens, shape[1]) # 把valid_lens转换成一个mask向量
else:
valid_lens = valid_lens.reshape(-1) # .reshape(-1)把张量拉成一维数组

value=-1e6) # # 最后一轴上被掩蔽的元素使用一个非常大的负值替换，从而其softmax输出为0
return nn.functional.softmax(X.reshape(shape), dim=-1)

以下是输入X与valid_lens的对应情况的三个例子：

## 2.加性注意力机制

class AdditiveAttention(nn.Module):
def __init__(self, key_size, query_size, num_hiddens, dropout, **kwargs):
self.W_k = nn.Linear(key_size, num_hiddens, bias=False) # bias是偏执b
self.W_q = nn.Linear(query_size, num_hiddens, bias=False) # nn.Linear具体怎么操作看收藏
self.w_v = nn.Linear(num_hiddens, 1, bias=False)
self.dropout = nn.Dropout(dropout)

def forward(self, queries, keys, values, valid_lens):
queries, keys = self.W_q(queries), self.W_k(keys)
features = queries.unsqueeze(2) + keys.unsqueeze(1)
features = torch.tanh(features)
scores = self.w_v(features).squeeze(-1)
return torch.bmm(self.dropout(self.attention_weights), values)

## 3.带入一个样例测试一下

queries, keys = torch.normal(0, 1, (2, 1, 20)), torch.ones((2, 10, 2))
values = torch.arange(40, dtype=torch.float32).reshape(1, 10, 4).repeat(
2, 1, 1)
print(queries) #二维 一行 20列
print(keys)
print(values)
valid_lens = torch.tensor([2, 6])

attention = AdditiveAttention(key_size=2, query_size=20, num_hiddens=8, dropout=0.1)
attention.eval() # model.eval()的作用是 不启用 Batch Normalization 和 Dropout
# eval() 时，pytorch 会自动把 BN 和 DropOut 固定住，不会取平均，而是用训练好的值
attention(queries, keys, values, valid_lens)

#然后画一个热力图
d2l.show_heatmaps(attention.attention_weights.reshape((1, 1, 2, 10)),
xlabel='Keys', ylabel='Queries')


### 1）torch.randn()函数 ，返回一个均值为0，方差为1的正态分布中填充随机数的张量

>>> torch.randn(4) # 一行四列
tensor([-2.1436,  0.9966,  2.3426, -0.6366])
>>> torch.randn(2,3) # 两行三列
tensor([[ 1.5954,  2.8929, -1.0923],
[ 1.1719, -0.4709, -0.1996]])
>>> torch.randn(2,2,3) # 两维两行三列
tensor([[[-0.1687, -0.2883, -1.2846],
[ 0.8579,  1.1618,  1.5979]],

[[-1.2387, -0.7416, -0.4778],
[-0.6276, -1.6339,  1.0678]]])

### 2）torch.nn.functional.Softmax()函数，计算张量的概率分布

#nn.functional.softmax(X, dim)，dim=0:对X的列输出概率分布，dim=1：对X的行输出概率分布
x= nn.Tensor( [ [1,2,3,4],[1,2,3,4],[1,2,3,4]])

y1= nn.functional.softmax(x, dim = 0) #对每一列进行softmax
print(y1)

y2 = nn.functional.softmax(x,dim =1) #对每一行进行softmax
print(y2)

x1 = nn.Tensor([1,2,3,4])
print(x1)

y3 = nn.functional.softmax(x1,dim=0) #一维时使用dim=0，使用dim=1报错
print(y3)

#输出
tensor([[0.3333, 0.3333, 0.3333, 0.3333],
[0.3333, 0.3333, 0.3333, 0.3333],
[0.3333, 0.3333, 0.3333, 0.3333]])
tensor([[0.0321, 0.0871, 0.2369, 0.6439],
[0.0321, 0.0871, 0.2369, 0.6439],
[0.0321, 0.0871, 0.2369, 0.6439]])
tensor([1., 2., 3., 4.])
tensor([0.0321, 0.0871, 0.2369, 0.6439])


import torch
import torch.nn.functional as F
input = torch.randn(2,2,3)
print(input)

m1 = F.softmax(input,dim=0) # 当dim=0时， 是对每一维度相同位置的数值进行softmax运算
print(m1)

m2 = F.softmax(input,dim=1) # 当dim=1时， 是对某一维度的列进行softmax运算
print(m2)

m3 = F.softmax(input,dim=2) # 当dim=2时， 是对某一维度的行进行softmax运算
print(m3)

m4 = F.softmax(input,dim=-1) # 当dim=-1时， 是对某一维度的行进行softmax运算
print(m4)

# 输出
tensor([[[-3.9332,  0.7909,  0.8927],
[-1.7991,  0.2505,  0.7695]],

[[ 0.1946,  0.1878,  1.2713],
[ 0.9536,  1.0525, -0.7081]]])

tensor([[[0.0159, 0.6464, 0.4065],
[0.0599, 0.3096, 0.8142]],

[[0.9841, 0.3536, 0.5935],
[0.9401, 0.6904, 0.1858]]])

tensor([[[0.1058, 0.6319, 0.5308],
[0.8942, 0.3681, 0.4692]],

[[0.3189, 0.2964, 0.8786],
[0.6811, 0.7036, 0.1214]]])

tensor([[[0.0042, 0.4726, 0.5232],
[0.0458, 0.3560, 0.5982]],

[[0.2029, 0.2015, 0.5955],
[0.4360, 0.4813, 0.0828]]])

tensor([[[0.0042, 0.4726, 0.5232],
[0.0458, 0.3560, 0.5982]],

[[0.2029, 0.2015, 0.5955],
[0.4360, 0.4813, 0.0828]]])



### 3）X.shape、np.size(X,0/1)、X.shape[0]、X.shape[1]、X.shape[-1]、

X.shape返回张量X的形状、np.size(X,0/1)张量X的形状,0:输出行数,1:输出列数，没有值的话输出X的元素个数（.shape是属性，.size()是函数）

image.shape[0]——图片高

image.shape[1]——图片长

image.shape[2]——图片通道数

shape[0]：表示矩阵的行数

shape[1]：表示矩阵的列数

shape[-1]：一般来说，-1代表最后一个，所以shape[-1]代表最后一个维度，如在二维张量里，shape[-1]表示列数，在一维行向量，shape[-1]表示行向量的元素总数，换言之也是列数。

import numpy as np
a=np.array([0,1,2,3])
b=np.array([[0],[1],[2],[3]])
c=np.array([[0,1,2,3]])
print(a.shape)
print(b.shape)
print(c.shape)
print(np.size(c))
print(c.shape[0])
print(c.shape[-1])

# 输出
(4,)
(4, 1)
(1, 4)
4
1
4


### 5）torch.repeat_interleave()函数

dim=0，按行复制，dim=1，按列复制，没给出dim的值的话，就把a拉成一维数组复制。

a=torch.arange(10).view(2,5)
b=torch.repeat_interleave(a,3,dim=0)
c=torch.repeat_interleave(a,3,dim=1)
d=torch.repeat_interleave(a,3)
print(a)
print(b)
print(c)
print(d)

# 输出
tensor([[0, 1, 2, 3, 4],
[5, 6, 7, 8, 9]])
tensor([[0, 1, 2, 3, 4],
[0, 1, 2, 3, 4],
[0, 1, 2, 3, 4],
[5, 6, 7, 8, 9],
[5, 6, 7, 8, 9],
[5, 6, 7, 8, 9]])
tensor([[0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4],
[5, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 8, 9, 9, 9]])
tensor([0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6, 7, 7, 7,
8, 8, 8, 9, 9, 9])

### 6）X.reshape()

>>> X.shape
(209, 64, 64, 3)

>>> X.reshape(X.shape[0], -1)
(209, 64*64*3)

>>> a = torch.tensor([[1, 2, 3], [4, 5, 6]])
>>> a.reshape(-1, a.shape[-1])
>>> print(a)
tensor([[1, 2, 3],
[4, 5, 6]])

### 7）torch.repeat_interleave()

torch.repeat_interleave(input, repeats, dim=None, *, output_size=None)

a = torch.arange(6).reshape(2,1,3)
res = torch.repeat_interleave(a,3,dim = 1) #张量a在第1维（行）上重复3遍
print(res)
print(a.shape)
print(res.shape)

tensor([[[0, 1, 2],
[0, 1, 2],
[0, 1, 2]],
[[3, 4, 5],
[3, 4, 5],
[3, 4, 5]]])
torch.Size([2, 1, 3])
torch.Size([2, 3, 3])


• 3
点赞
• 11
收藏
觉得还不错? 一键收藏
• 0
评论
05-06
08-30 272
08-01 348
04-30 2485
04-15 7117
03-25 2万+
04-21 1586
08-12 1066

1.余额是钱包充值的虚拟货币，按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载，可以购买VIP、付费专栏及课程。