# tensor

import torch
x=torch.empty(5,3)
x=torch.rand(5,3)
torch.randint(5, (3,), dtype=torch.int64)
x=torch.zeros(5,3)
x=torch.ones(5,3,dtype=torch.long)


layer要先实例化，再调用。
Linear线性层：

Softmax激活函数层

## function torch.nn.functional.

import torch.nn.functional as F

F.softmax(input, dim=None, _stacklevel=3)
F.log_softmax(input, dim=None, _stacklevel=3)
NLLLoss The negative log likelihood loss. -sigma_i(golden_i*log(prediction_i)

torch.nn.functional.nll_loss(input, target, weight=None, size_average=None, ignore_index=-100, reduce=None, reduction=‘elementwise_mean’)

torch.nn.functional.cross_entropy(input, target, weight=None, size_average=None, ignore_index=-100, reduce=None, reduction=‘elementwise_mean’)
This criterion combines log_softmax and nll_loss in a single function.

input维度：（N,C） (N,C,d1,d2…dk) 接收的是log_softmax值
target维度：（N）(N,d1,d2,…dk)


>>> # input is of size N x C = 3 x 5
>>> input = torch.randn(3, 5, requires_grad=True)
>>> # each element in target has to have 0 <= value < C
>>> target = torch.tensor([1, 0, 4])
>>> output = F.nll_loss(F.log_softmax(input), target)
>>> output.backward()

>>> input = torch.randn(3, 5, requires_grad=True)
>>> target = torch.randint(5, (3,), dtype=torch.int64)
>>> loss = F.cross_entropy(input, target)
>>> loss.backward()


## stacked layers

import copy
def clones(module, N):
"Produce N identical layers."
return nn.ModuleList([copy.deepcopy(module) for _ in range(N)])


nn.Dropout
nn.Embeddings
nn.Sequencial()
torch.arange()

>>> torch.arange(5)
tensor([ 0,  1,  2,  3,  4])
>>> torch.arange(1, 4)
tensor([ 1,  2,  3])
>>> torch.arange(1, 2.5, 0.5)
tensor([ 1.0000,  1.5000,  2.0000])


a.size(-1) The returned value is a subclass of tuple.(tuple是不可变的list)
torch.transpose(input, dim0, dim1) → Tensor 或者 input.transpose(-2,-1)也可以，两维交换维数

a.view(-1,2)改变形状，-1是根据其他维数算出来的
a.unsqueeze(1)增加再第x个维前面加上一维 23变成213 a.unsqueeze(-1) 变成231 【索引方向不同 加的那一列的位置不同】
a.squeeze(1)减少1的个数 2
131变成231
contiguous()

contiguous：view只能用在contiguous的variable上。如果在view之前用了transpose, permute等，需要用contiguous()来返回一个contiguous copy。

import torch
x = torch.ones(10, 10)
x.is_contiguous()  # True
x.transpose(0, 1).is_contiguous()  # False
x.transpose(0, 1).contiguous().is_contiguous()  # True


python 的zip会参照短板效应

self.linears = clones(nn.Linear(d_model, d_model), 4)
query, key, value = [l(x).view(nbatches, -1, self.h, self.d_k).transpose(1, 2) for l, x in zip(self.linears, (query, key, value))]


zip函数就值参照最外层（第一层）的维数235维 实际上只看成了2个3*5维进行zip

numpy np.triu()提取矩阵上三角

@staticmethod