torch的基本用法 | 超全总结

最新推荐文章于 2023-10-20 11:16:02 发布

一天只能上一天班

最新推荐文章于 2023-10-20 11:16:02 发布

阅读量1.7k

点赞数 17

文章标签：深度学习

本文链接：https://blog.csdn.net/K_Kelly_/article/details/133463305

版权

前言

PyTorch的Tensor和Numpy的ndarray十分相似，但Tensor具备2个ndarray不具备但对深度学习（deep learning）来说非常重要的2个功能：

Tensor能利用GPU计算
Tensor在计算时，能够作为节点自动地加入计算图中，而计算图可以为其中的每个节点自动地计算微分，也就是说，使用Tensor时就不需要手动计算微分。

实操

调库

import torch
import numpy as np

Tensor对象及其运算

默认类型

import torch
import numpy as np
print('torch.Tensor默认为:{}'.format(torch.Tensor(1).dtype))
print('torch.tensor默认为:{}'.format(torch.tensor(1).dtype))

# torch.Tensor默认为:torch.float32
# torch.tensor默认为:torch.int64

构建方式

a = torch.tensor([[1,2],[3,4]],dtype = torch.float64)
a # 直接用list构建

# tensor([[1., 2.],
#         [3., 4.]], dtype=torch.float64)

b = torch.tensor(np.array([[1,2],[3,4]]),dtype=torch.uint8)
b # 用np构建

# tensor([[1, 2],
#         [3, 4]], dtype=torch.uint8)

通过device指定设备

cuda0 = torch.device('cuda:0')
c = torch.ones((2,2),device=cuda0)
c

# tensor([[1., 1.],
#         [1., 1.]], dvice='cuda:0')

支持在CPU和GPU之间复制变量

c = c.to('cpu',torch.double)
print(c.device) # cpu
b = b.to(cuda0,torch.float)
print(b.device) # cpu:0

矩阵运算

a = torch.tensor([[1,2],[3,4]])
b = torch.tensor([[1,2],[3,4]])
c = a*b
print("逐元素相乘:",c)
c = torch.mm(a,b)
print("矩阵乘法:",c)

# 逐元素相乘: tensor([[ 1,  4],
#         [ 9, 16]])
# 矩阵乘法: tensor([[ 7, 10],
#         [15, 22]])

数据调整

torch.clamp起分段函数作用，可用于去掉矩阵中过校或者过大的元素，常用在梯度爆炸的情况

a = torch.tensor([[1,2],[3,4]])
torch.clamp(a,min=2,max=3)

# tensor([[2, 2],
#         [3, 3]])

torch.round将小数部分化整，四舍五入

a = torch.tensor([-1.1,0.5,0.501,0.99])
torch.round(a)

# tensor([-1.,  0.,  1.,  1.])

torch.tanh计算双曲正切函数，将数值映射到(0,1)

a = torch.tensor([-3,-2,-1,-0.5,0,0.5,1,2,3])
torch.tanh(a)

# tensor([-0.9951, -0.9640, -0.7616, -0.4621,  0.0000,  0.4621,  0.7616,  0.9640,
#          0.9951])

矩阵构建的其他方式

print(torch.arange(5))
# tensor([0, 1, 2, 3, 4])

print(torch.arange(1,5,2))
# tensor([1, 3])

print(torch.linspace(0,5,10))
# tensor([0.0000, 0.5556, 1.1111, 1.6667, 2.2222, 2.7778, 3.3333, 3.8889, 4.4444,
#         5.0000])

print(torch.ones(3,3))
# tensor([[1., 1., 1.],
#         [1., 1., 1.],
#         [1., 1., 1.]])

print(torch.zeros(3,3))
# tensor([[0., 0., 0.],
#         [0., 0., 0.],
#         [0., 0., 0.]])

生成均匀分布采样的矩阵

torch.rand返回从[0,1]的均匀分布采样的元素所组成的矩阵

torch.rand(3,3)

# tensor([[0.0860, 0.1347, 0.7903],
#         [0.7097, 0.9961, 0.1844],
#         [0.4367, 0.3065, 0.7460]])

torch.randn返回从正态分布采样的元素所组成的矩阵

torch.randn(3,3)

# tensor([[-0.7903,  0.6995,  1.3291],
#         [-0.1502,  1.3087, -0.4836],
#         [ 0.5208,  0.4172,  0.5930]])

torch.randint返回指定区间的均匀分布采样的随机整数所组成的矩阵，前闭后开

torch.randint(0,9,(3,3))

# tensor([[3, 2, 4],
#         [8, 5, 2],
#         [8, 7, 4]])

Tensor的索引和切片

a = torch.arange(9).view(3,3)
print(a)
# tensor([[0, 1, 2],
#         [3, 4, 5],
#         [6, 7, 8]])

基本索引

print(a[2,2])
# tensor(8)

切片

print(a[1:,:-1]) # 从第1行开始，截止到倒数第一列(不包含)
# tensor([[3, 4],
#         [6, 7]])

带步长的切片(PyTorch现不支持负步长)

print(a[::2])
# tensor([[0, 1, 2],
#         [6, 7, 8]])

整数索引

rows = [0,1]
cols = [2,2]
print(a[rows,cols])
# tensor([2, 5])

bool索引

index = a>4
print(index)
print(a[index])
# tensor([[False, False, False],
#         [False, False,  True],
#         [ True,  True,  True]])
# tensor([5, 6, 7, 8])

torch.nonzero用于返回非0值得索引矩阵

a = torch.arange(9).view(3,3)
index = torch.nonzero(a>=2)
print(index)
# tensor([[0, 2],
#         [1, 0],
#         [1, 1],
#         [1, 2],
#         [2, 0],
#         [2, 1],
#         [2, 2]])

torch.where(condition,x,y)判断condition的条件是否满足，当某个元素满足时，返回对应矩阵x相同位置的元素，否则返回矩阵y的元素

x = torch.randn(3,2)
y = torch.ones(3,2)
print(x)
print(torch.where(x>0,x,y))
# tensor([[-0.4600, -0.1726],
#         [-0.7729,  1.3573],
#         [-0.8406,  1.8607]])
# tensor([[1.0000, 1.0000],
#         [1.0000, 1.3573],
#         [1.0000, 1.8607]])

Tensor的变换、拼接和拆分

PyTorch提供了大量的对Tensor进行操作的函数或方法，这些函数内部使用指针实现对矩阵的形状变换、拼接、拆分等操作，使得人们无需关心Tensor在内存中的物理结构或者管理指针就可以方便且快速地执行这些操作。

a = torch.rand(1,2,3,4,5)
print("元素个数：",a.nelement())
print("轴的个数：",a.ndimension())
print("矩阵维度：",a.size(),a.shape)
# 元素个数： 120
# 轴的个数： 5
# 矩阵维度： torch.Size([1, 2, 3, 4, 5]) torch.Size([1, 2, 3, 4, 5])

torch.nonzero用于返回非0值得索引矩阵

a = torch.arange(9).view(3,3)
index = torch.nonzero(a>=2)
print(index)
# tensor([[0, 2],
#         [1, 0],
#         [1, 1],
#         [1, 2],
#         [2, 0],
#         [2, 1],
#         [2, 2]])

torch.reshape和torch.view都可以用于更改Tensor的维度，区别在于Tensor.view要求Tensor的物理存储必须时连续的，否则将报错，而tensor.reshape无此要求。

Tensor.view返回的一定是一个索引，更改返回值，也就是说原始值同时被更改。

Tensor.reshape返回的时引用还是复制是不确定的。

两者的相同点是都要接受要输出的维度作为参数，且输出的矩阵元素个数不能改变，可以在维度中输入-1，PyTorch会自动推断他的数值

b = a.view(2*3,4*5)
print(b.shape) # 120个元素有1*2*3*4*5转成6*20
# torch.Size([6, 20])

c = a.reshape(-1) # 自动推断
print(c.shape)
# torch.Size([120])

d = a.reshape(2*3,-1) # 自动推断
print(d.shape)
# torch.Size([6, 20])

torch.squeeze去掉维度为1的轴

b = torch.squeeze(a)
b.shape
# torch.Size([2, 3, 4, 5])

torch.unsqueeze在指定位置添加一个维度为1的轴

torch.unsqueeze(b,0).shape
# torch.Size([1, 2, 3, 4, 5])

转置矩阵，只能转置2维矩阵

a = torch.tensor([[2]])
print(torch.transpose(a,1,0,))
print(torch.t(a))
# tensor([[2]])
# tensor([[2]])

b = torch.tensor([[2,3]])
print(torch.transpose(b,1,0,))
print(torch.t(b))
# tensor([[2],
#         [3]])
# tensor([[2],
#         [3]])

对于高维度的Tensor，可以使用permyte()方法来变换维度

a = torch.rand((1,224,224,3))
print(a.shape)
# torch.Size([1, 224, 224, 3])

b = a.permute((0,3,1,2)) # 调整维度顺序
print(b.shape)
# torch.Size([1, 3, 224, 224])

拼接矩阵。torch.cat和torch.stack都可用于拼接

不同的是torch.cat在已有的轴dim上拼接矩阵，给定的维度可以不同，但其他的轴维度必须相同。

torch.stack在新的轴上拼接，要求被拼接的矩阵所有维度都相同

a = torch.randn(2,3)
b = torch.randn(3,3)

c = torch.cat((a,b)) # 默认为度为0(行)
print(c.shape)
# torch.Size([5, 3])

d = torch.cat((b,b,b),dim=1)
print(d.shape)
# torch.Size([3, 9])

e = torch.cat((b,b,b),dim=0)
print(e.shape)
# torch.Size([9, 3])

f = torch.stack((b,b),dim=1)
print(f.shape)
# torch.Size([3, 2, 3])

g = torch.stack((b,b),dim=0)
print(g.shape)
# torch.Size([2, 3, 3])

拆分矩阵。torch.split和torch.chunk都可用于拆分矩阵。

不同在于torch.split传入的是拆分后每个矩阵的大小，可以传入list,，也可以是整数

而torch.chunk传入的是拆分的矩阵个数

a = torch.randn(10,3)
for x in torch.split(a,[1,2,3,4],dim=0): # 对拆分后的每个矩阵大小有限定
    print(x.shape)
# torch.Size([1, 3])
# torch.Size([2, 3])
# torch.Size([3, 3])
# torch.Size([4, 3])

for x in torch.split(a,4,dim=0): # 对拆分后的每个矩阵大小都限制为4
    print(x.shape)
# torch.Size([4, 3])
# torch.Size([4, 3])
# torch.Size([2, 3])

for x in torch.chunk(a,4,dim=0):
    print(x.shape)# 向上取整
# torch.Size([3, 3])
# torch.Size([3, 3])
# torch.Size([3, 3])
# torch.Size([1, 3])

If the tensor size along the given dimension dim is not divisible by chunks, all returned chunks will be the same size, except the last one. If such division is not possible, this function may return fewer than the specified number of chunks.(from 官方文档)

for x in torch.chunk(a,6,dim=0):
    print(x.shape)# 
# torch.Size([2, 3])
# torch.Size([2, 3])
# torch.Size([2, 3])
# torch.Size([2, 3])
# torch.Size([2, 3])

PyTorch的Reduction操作

Reduction运算的特点是它往往对一个Tensor内的元素做归约操作

找最大值

a = torch.tensor([[1,2],[8,4],[5,6]])
print(a)
# tensor([[1, 2],
#         [8, 4],
#         [5, 6]])

print(a.shape)
# torch.Size([3, 2])

print(torch.max(a)) 
# tensor(4)

print(torch.max(a,dim=0)) # 指定维度
# 按dim=0维度，从左往右来看每一列最大值
# torch.return_types.max(
# values=tensor([8, 6]),
# indices=tensor([1, 2]))

print(torch.max(a,dim=1)) # 指定维度
# 按dim=1维度，从上往下来看每一行最大值
# torch.return_types.max(
# values=tensor([2, 8, 6]),
# indices=tensor([1, 0, 1]))

累加，累乘

注意dim为方向，若dim=0,则表示按“行”进行操作.

a = torch.tensor([[1,2],[3,4],[5,6]])
print(a)
# tensor([[1, 2],
#         [3, 4],
#         [5, 6]])

print(torch.cumsum(a,dim=0)) # 沿行进行累加，从上往下
# tensor([[ 1,  2],
#         [ 4,  6],
#         [ 9, 12]])

print(torch.cumprod(a,dim=1)) # 沿列进行累乘，从左往右
# tensor([[ 1,  2],
#         [ 3, 12],
#         [ 5, 30]])

计算矩阵的均值，中值，协方差

a = torch.Tensor([[1,2],[3,4]])
print(a.mean(),a.median(),a.std())
# tensor(2.5000) tensor(2.) tensor(1.2910)

使用torch.unique来找出矩阵中出现过的元素

a = torch.randint(0,3,(3,3))
print(a)
# tensor([[2, 0, 1],
#         [2, 0, 0],
#         [1, 2, 1]])

print(torch.unique(a))
# tensor([0, 1, 2])

PyTorch的自动微分

将Tensor的requires_grad属性设置为True时，PyTorch的torch.autograd会自动地追踪它地计算轨迹，当需要计算微分的时候，只需要对最终计算结果的Tensor调用backward方法，中间所有计算节点的微分就会被保存在grad属性中了。

x = torch.arange(9).view(3,3)
print(x.requires_grad) #默认为False
# False

x = torch.rand(3,3,requires_grad=True)
print(x)
# tensor([[0.8070, 0.8937, 0.4740],
#         [0.3787, 0.7576, 0.5290],
#         [0.8640, 0.7519, 0.1954]], requires_grad=True)

使用temsor.backward()自动求微分

x = torch.rand(3,3,requires_grad=True)
w = torch.ones(3,3,requires_grad=True)
y = torch.sum(torch.mm(w,x))
print(y)
# tensor(15.8791, grad_fn=<SumBackward0>)

print(torch.mm(w,x))
# tensor([[2.0205, 1.7986, 1.4740],
#         [2.0205, 1.7986, 1.4740],
#         [2.0205, 1.7986, 1.4740]], grad_fn=<MmBackward0>)

y.backward()
print(y.grad)
# None

print(x.grad)
# tensor([[3., 3., 3.],
#         [3., 3., 3.],
#         [3., 3., 3.]])

print(w.grad)
# tensor([[2.4803, 1.3675, 1.4452],
#         [2.4803, 1.3675, 1.4452],
#         [2.4803, 1.3675, 1.4452]])

使用Tensor.detach将Tensor从计算图中剥离出去，不再计算它的微分

x = torch.rand(3,3,requires_grad=True)
w = torch.ones(3,3,requires_grad=True)
print(x)
# tensor([[0.0432, 0.5113, 0.1016],
#         [0.6781, 0.1832, 0.0147],
#         [0.0278, 0.7721, 0.6046]], requires_grad=True)
print(w)
# tensor([[1., 1., 1.],
#         [1., 1., 1.],
#         [1., 1., 1.]], requires_grad=True)
yy = torch.mm(w,x)
print(yy)
# tensor([[0.7491, 1.4666, 0.7208],
#         [0.7491, 1.4666, 0.7208],
#         [0.7491, 1.4666, 0.7208]], grad_fn=<MmBackward0>)

detached_yy = yy.detach()
y = torch.mean(yy)
y.backward()
print(y)
# tensor(0.9788, grad_fn=<MeanBackward0>)
print(yy.grad)
# None

print(detached_yy)
# tensor([[0.7491, 1.4666, 0.7208],
#         [0.7491, 1.4666, 0.7208],
#         [0.7491, 1.4666, 0.7208]])
print(w.grad)
# tensor([[0.0729, 0.0973, 0.1561],
#         [0.0729, 0.0973, 0.1561],
#         [0.0729, 0.0973, 0.1561]])
print(x.grad)
# tensor([[0.3333, 0.3333, 0.3333],
#         [0.3333, 0.3333, 0.3333],
#         [0.3333, 0.3333, 0.3333]])

with torch.no_grad():包括的代码段不会计算微分，常用于测试函数

y = torch.sum(torch.mm(w,x))
print(y.requires_grad) # True

with torch.no_grad():
    y = torch.sum(torch.mm(w,x))
    print(y.requires_grad) # False