基本运算
x.view(1,-1)
# -1 能通过计算推断出来
torch.zeros/ones([3,4])
#获取值为0/1的矩阵
按元素运算:+、-、*、/,x.float(),torch.exp()
矩阵乘法:x@y
维度要匹配
torch.dot(v1,v2)
#两个向量内积 (tensor为1维,无广播)
torch.mm(x,y)
#矩阵乘法,无广播
torch.matmul()
#乘法有广播(算后两维)
torch.bmm(x,y)
#只能进行3维矩阵乘法,且维度要对应
torch.mv(m,v)
#矩阵与向量相乘,似mm
x.detach()
#截断梯度
torch.stack(inputs,dim)
#input为list或tuple且内shape相同,dim为生成结果的维度
x.numpy() , torch.from_numpy(x)
互逆运算
梯度:求:y.backward()
,;设置追踪 :x.requiers_grad(True)
;更新:loss.step()
或 model.step()
torch.numel(x)
# 返回输入张量的元素个数
torch.randn([3,4])
#正太分布 N(0,1)
torch.sum() ,torch.mean()
#求和/均值
torch.norm(input,p,dim)
#返回输入张量给定维度dim上每行p范数 (p=1,2默认2)
item()
#将只有一个元素的tensor转为python类型,多元素用tolist()
torch.squeeze(dim), torch.unsqueeze(dim)
#加减某个长为1的维度
torch.permute(1,0,2) ,torch.transpose(input,dim0,dim1)
#dim0<->dim1
x.repeat([1,2])
#把第二维复制为原来的2倍
enumerate(list)
=>i,list[i]
net.parameters()
#将常量转化为网络参数
net.embedding.weight.requires_grad = False
#关闭梯度
optimizer=torch.optim.Adam(net.parameters(),lr)
#优化器设置
内存分配:每个操作都新开内存来存储运算结果,除非 torch.add(x,y,out=z)
, 例如:x[:]=x+y/x+=y
:先新开再复制回原地址
性能分析器profiler
import torch.autograd.profiler as profiler
class MyModule(nn.Module):
def __init__(self, in_features: int, out_features: int, bias: bool = True):
super(MyModule, self).__init__()
self.linear = nn.Linear(in_features, out_features, bias)
def forward(self, input, mask):
with profiler.record_function("LINEAR PASS"):
out = self.linear(input)
with profiler.record_function("MASK INDICES"):
threshold = out.sum(axis=1).mean()
hi_idx = (mask > threshold).nonzero(as_tuple=True)
return out, hi_idx
model = MyModule(500, 10).cuda()
input = torch.rand(128, 500).cuda()
mask = torch.rand((500, 500, 500), dtype=torch.float).cuda()
#warm-up
model(input, mask)
with profiler.profile(with_stack=True, profile_memory=True) as prof:
out, idx = model(input, mask)
print(prof.key_averages(group_by_stack_n=5).table(sort_by='self_cpu_time_total', row_limit=5))