Pytorch 源码理解(一 、张量)
目录
学习Pytorch框架的第一步就是会运用,但是需要解决实际问题的时候仅仅会调用函数可能会出现各种Bug,理解源码才能进一步运用Pytorch解决实际问题。
我们从Torch、Torchvision建立和训练模型,再到TorchServe部署模型,对核心的Pytorch框架进行深入解读和案例演示。
1. 张量基础 Tensor
"""
Created on April 23, 2021
@author ClearTorch
"""
import torch
import numpy as np
print('Torch version is ',torch.__version__)
print('Numpy version is ' , np.__version__)
Torch version is 1.8.1+cu102
Numpy version is 1.20.2
1.1 生成张量
-
直接从数据生成:torch.tensor(data), torch.as_tensor(data), 自动识别data的数据类型。
-
从Numpy的数组生成:torch.from_numpy(numpy_ndarray)
-
其他生成方式:参数(*size, *, out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False) torch.zeros torch.zeros_like torch.ones torch.ones_like torch.arange torch.range torch.linspace torch.logspace torch.eye torch.empty torch.empty_like torch.empty_strided torch.full torch.full_like
# torch.tensor(data, *, dtype=None, device=None, requires_grad=False, pin_memory=False)→ Tensor
list_data = [[1, 2, 3,4], [5, 6, 7, 8]]
tensor_from_list = torch.tensor(list_data).to(torch.float16)
print(tensor_from_list)
print(tensor_from_list.shape)
tensor([[1., 2., 3., 4.], [5., 6., 7., 8.]], dtype=torch.float16) torch.Size([2, 4])
# torch.as_tensor(data, dtype=None, device=None) → Tensor
tensor_from_list_goodway = torch.as_tensor(list_data, dtype=torch.float16, device='cuda')
print(tensor_from_list_goodway)
tensor([[1., 2., 3., 4.], [5., 6., 7., 8.]], device='cuda:0', dtype=torch.float16)
# torch.from_numpy(ndarray) → Tensor
numpy_adarray = np.array(list_data)
tensor_from_numpy = torch.from_numpy(numpy_adarray)
print(tensor_from_numpy)
numpy_from_tensor = tensor_from_numpy.numpy()
print('Tensor to Numpy:')
print(numpy_from_tensor)
tensor([[1, 2, 3, 4], [5, 6, 7, 8]], dtype=torch.int32) Tensor to Numpy: [[1 2 3 4] [5 6 7 8]]
# torch.ones_like(input, *, dtype=None, layout=None, device=None, requires_grad=False, memory_format=torch.preserve_format) → Tensor
ones_tensor = torch.ones_like(tensor_from_list)
print(ones_tensor)
tensor([[1., 1., 1., 1.], [1., 1., 1., 1.]], dtype=torch.float16)
# torch.rand_like(input, *, dtype=None, layout=None, device=None, requires_grad=False, memory_format=torch.preserve_format) → Tensor
random_tensor = torch.rand_like(tensor_from_list)
print(random_tensor)
tensor([[0.2705, 0.8960, 0.3555, 0.5791], [0.2979, 0.0742, 0.2378, 0.0137]], dtype=torch.float16)
# torch.eye(n, m=None, *, out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False) → Tensor
eye_tensor = torch.eye(3,3).to(torch.float64)
print(eye_tensor)
tensor([[1., 0., 0.], [0., 1., 0.], [0., 0., 1.]], dtype=torch.float64)
# torch.full(size, fill_value, *, out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False) → Tensor
full_6_tensor = torch.full((6,6), 6)
print(full_6_tensor)
print(full_6_tensor.dtype)
print(full_6_tensor.dim())
tensor([[6, 6, 6, 6, 6, 6], [6, 6, 6, 6, 6, 6], [6, 6, 6, 6, 6, 6], [6, 6, 6, 6, 6, 6], [6, 6, 6, 6, 6, 6], [6, 6, 6, 6, 6, 6]]) torch.int64 2
# torch.arange(start=0, end, step=1, *, out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False) → Tensor
arange_tensor = torch.arange(0, 10, 2)
print(arange_tensor)
print(arange_tensor.device)
tensor([0, 2, 4, 6, 8]) cpu
# torch.linspace(start, end, steps, *, out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False) → Tensor
linspace_tensor = torch.linspace(0, 10, 5).to('cuda')
print(linspace_tensor)
print(linspace_tensor.device)
tensor([ 0.0000, 2.5000, 5.0000, 7.5000, 10.0000], device='cuda:0') cuda:0
1.2 张量操作
-
索引(和Numpy一样)
-
拼接
-
拆分
-
增维降维
-
数学运算:点运算、矩阵运算
-
单元素张量转Python数据类型:.item()
索引
# 索引
tensor = torch.ones(4, 4)
print('First row: ',tensor[0])
print('First column: ', tensor[:, 0])
print('Last column:', tensor[..., -1])
tensor[:,1] = 0
print(tensor)
First row: tensor([1., 1., 1., 1.]) First column: tensor([1., 1., 1., 1.]) Last column: tensor([1., 1., 1., 1.]) tensor([[1., 0., 1., 1.], [1., 0., 1., 1.], [1., 0., 1., 1.], [1., 0., 1., 1.]])
拼接
# 拼接 torch.cat(tensors, dim=0, *, out=None) → Tensor
print('tensor shape:', tensor.shape)
t1_dim_1 = torch.cat([tensor, tensor, tensor], dim=1)
print(t1_dim_1)
print('Joining tensor shape:', t1_dim_1.shape)
tensor shape: torch.Size([4, 4]) tensor([[1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.], [1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.], [1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.], [1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.]]) Joining tensor shape: torch.Size([4, 12])
join_tensor_a = torch.tensor([[1,2,3],[4,5,6]])
join_tensor_b = torch.tensor([[7,8,9],[10,11,12]])
cat_dim_0 = torch.cat([join_tensor_a, join_tensor_b], dim=0)
print(cat_dim_0)
print('Joining tensor shape:', cat_dim_0.shape)
tensor([[ 1, 2, 3], [ 4, 5, 6], [ 7, 8, 9], [10, 11, 12]]) Joining tensor shape: torch.Size([4, 3])
cat_dim_1 = torch.cat([join_tensor_a, join_tensor_b], dim=1)
print(cat_dim_1)
print('Joining tensor shape:', cat_dim_1.shape)
tensor([[ 1, 2, 3, 7, 8, 9], [ 4, 5, 6, 10, 11, 12]]) Joining tensor shape: torch.Size([2, 6])
# torch.hstack(tensors, *, out=None) → Tensor
# hstack = cat dim=1
hstack_tensor = torch.hstack((join_tensor_a, join_tensor_b))
print(hstack_tensor)
print(hstack_tensor.shape)
tensor([[ 1, 2, 3, 7, 8, 9], [ 4, 5, 6, 10, 11, 12]]) torch.Size([2, 6])
# torch.dstack(tensors, *, out=None) → Tensor
dstack_tensor = torch.dstack((join_tensor_a, join_tensor_b))
print(dstack_tensor)
print(dstack_tensor.shape)
tensor([[[ 1, 7], [ 2, 8], [ 3, 9]], [[ 4, 10], [ 5, 11], [ 6, 12]]]) torch.Size([2, 3, 2])
拆分
# torch.split(tensor, split_size_or_sections, dim=0)
orig_tensor = torch.arange(10).reshape(5,2)
print('orig_tensor',orig_tensor)
split_dim_2 = torch.split(orig_tensor, 2)
print(split_dim_2)
split_dim_0 = torch.split(orig_tensor, [1,4])
orig_tensor tensor([[0, 1], [2, 3], [4, 5], [6, 7], [8, 9]]) (tensor([[0, 1], [2, 3]]), tensor([[4, 5], [6, 7]]), tensor([[8, 9]]))
改变形状
reshape/view和resize的区别:
-
相同点:都是可以改变tensor的形状
-
不同点: .view()方法只能改变连续的(contiguous)张量,否则需要先调用.contiguous()方法;而.reshape()方法不受此限制;如果对 tensor 调用过 transpose, permute等操作的话会使该 tensor 在内存中变得不再连续。.view()方法返回的张量与原张量共享基础数据(存储器,注意不是共享内存地址);.reshape()方法返回的可能是原张量的copy,也可能不是,这个我们不知道。
非连续的情况下使用.view和.reshape
# torch.reshape(input, shape) → Tensor
print(dstack_tensor.is_contiguous())
reshape_tensor = dstack_tensor.reshape(2, 6)
print(reshape_tensor)
True tensor([[ 1, 7, 2, 8, 3, 9], [ 4, 10, 5, 11, 6, 12]])
dstack_tensor = dstack_tensor.contiguous()
view_tensor = dstack_tensor.view(2,6)
print(view_tensor)
tensor([[ 1, 7, 2, 8, 3, 9], [ 4, 10, 5, 11, 6, 12]])
resize_tensor = dstack_tensor.resize(2,6)
print(resize_tensor)
tensor([[ 1, 7, 2, 8, 3, 9], [ 4, 10, 5, 11, 6, 12]])
减小维度
random_tensor = torch.randn(5, 5)
print(random_tensor)
tensor([[ 1.0391, -0.8959, 0.6222, 0.2725, 1.5005], [-1.5842, -0.5551, 1.8932, 0.0349, 1.5213], [-0.2699, -1.3874, -2.1950, -0.4526, -0.0115], [-0.4008, 0.5420, 1.0060, -1.0007, -1.1480], [ 2.0848, -2.3114, -1.1065, -0.4301, 1.4364]])
# 返回最大值的索引
# torch.argmax(input, dim, keepdim=False) → LongTensor
max_tensor = torch.argmax(random_tensor)
print(max_tensor)
tensor(20)
# 返回最小值的索引
random_tensor.argmin()
tensor(21)
# 求维度1的最大值索引
max_tensor_dim1 = random_tensor.argmax(dim=1)
print(max_tensor_dim1)
tensor([4, 2, 4, 2, 0])
# 取最大值 torch.amax(input, dim, keepdim=False, *, out=None) → Tensor
random_tensor.amax(dim=0)
tensor([2.0848, 0.5420, 1.8932, 0.2725, 1.5213])
random_tensor.max()
tensor(2.0848)
# torch.amin(input, dim, keepdim=False, *, out=None) → Tensor
random_tensor.amin(dim=1)
tensor([-0.8959, -1.5842, -2.1950, -1.1480, -2.3114])
random_tensor.min()
tensor(-2.3114)
# 取均值 torch.mean(input, dim, keepdim=False, *, out=None) → Tensor
random_tensor.mean()
tensor(-0.0718)
random_tensor.mean(dim=1, keepdim=True)
tensor([[ 0.5077], [ 0.2620], [-0.8633], [-0.2003], [-0.0653]])
# 求标准差 torch.std(input, dim, unbiased=True, keepdim=False, *, out=None) → Tensor
random_tensor.std()
tensor(1.2442)
# 求方差 torch.var(input, dim, unbiased=True, keepdim=False, *, out=None) → Tensor
random_tensor.var()
tensor(1.5480)
# 取唯一值
random_tensor.unique()
tensor([-2.3114, -2.1950, -1.5842, -1.3874, -1.1480, -1.1065, -1.0007, -0.8959, -0.5551, -0.4526, -0.4301, -0.4008, -0.2699, -0.0115, 0.0349, 0.2725, 0.5420, 0.6222, 1.0060, 1.0391, 1.4364, 1.5005, 1.5213, 1.8932, 2.0848])
# 求和
a_tensor = torch.as_tensor([3.1415926])
agg = a_tensor.sum()
print(agg)
tensor(3.1416)
# tensor转换为python数据结构
agg_item = a_tensor.item()
print( agg_item, type(agg_item))
3.141592502593994 <class 'float'>
# 判断每一个元素的bool值
torch.all(random_tensor)
tensor(True)
# 判断任意元素的bool值
torch.any(random_tensor)
tensor(True)
保存和加载张量
import io
# torch.save(obj, f, pickle_module=pickle, pickle_protocol=2, _use_new_zipfile_serialization=True)
x = torch.tensor([0, 1, 2, 3, 4, 5])
torch.save(x, 'tensor.pt')
# Save to io.BytesIO buffer
buffer = io.BytesIO()
torch.save(x, buffer)
# torch.load(f, map_location=None, pickle_module=pickle, **pickle_load_args)
x = torch.load('tensors.pt')
# Load all tensors onto the CPU
x_cpu = torch.load('tensors.pt', map_location=torch.device('cpu'))
# Load all tensors onto the CPU, using a function
x_loc = torch.load('tensors.pt', map_location=lambda storage, loc: storage)
# Load all tensors onto GPU 1
x_cpu1 = torch.load('tensors.pt', map_location=lambda storage, loc: storage.cuda(1))
# Map tensors from GPU 1 to GPU 0
x_map = torch.load('tensors.pt', map_location={'cuda:1':'cuda:0'})
# Load tensor from io.BytesIO object
with open('tensor.pt', 'rb') as f:
buffer = io.BytesIO(f.read())
x_bytes = torch.load(buffer)
# Load a module with 'ascii' encoding for unpickling
x_torch.load('module.pt', encoding='ascii')
梯度计算开关
# 在不进行反向传播时,不需要计算梯度,如在推理阶段
x = torch.tensor([0.8], requires_grad=True)
with torch.no_grad():
y = x * 2
y.requires_grad
False
@torch.no_grad()
def doubler(x):
return x * 2
z = doubler(x)
z.requires_grad
False
x = torch.tensor([0.68], requires_grad=True)
with torch.no_grad():
with torch.enable_grad():
y = x * 2
y.requires_grad
True
y.backward()
x.grad
tensor([2.])
@torch.enable_grad()
def doubleer(x):
return x*2
with torch.no_grad():
z = doubler(x)
z.requires_grad
False