文章目录
1. 层与块
1.1 nn.Sequential 顺序结构
如果我们定义的网络结构只是简单的顺序结构,我们可以用 nn.Sequential 类将所需的类按照从前到后的顺序进行排列即可。
- 代码
# -*- coding: utf-8 -*-
# @Project: zc
# @Author: zc
# @File name: pytorch_block_test
# @Create time: 2021/11/30 20:06
# 1. 导入相关库
import torch
from torch import nn
from d2l import torch as d2l
from torch.nn import functional as F
# 2.定义网络结构,如果需要定义的结构为简单的顺序结构
# 我们可以直接按照顺序将需要的结构按照顺序放在nn.Sequential里面即可
net = nn.Sequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))
# 3.定义输入 x,x 从均值为0,方差为 1 的正太分布中采样,得到一个矩阵:行=2,列=20
x = torch.randn(2, 20)
y = net(x) # (2,20)->(20,256)->(256,10)
# ----> 根据流向,可以得到 x.shape = torch.Size([2,10])
# 4.打印输出 x, y 和 y的形状
print(f'x.shape={x.shape}')
print(f'x={x}')
print(f'y.shape={y.shape}')
print(f'y={y}')
- 结果
x.shape=torch.Size([2, 20])
x=tensor([[0.0047, 0.5065, 0.5100, 0.3056, 0.0196, 0.9957, 0.0015, 0.5036, 0.5726,
0.5680, 0.1238, 0.6555, 0.4393, 0.6692, 0.8003, 0.1764, 0.7283, 0.3125,
0.5573, 0.4566],
[0.2454, 0.8227, 0.7368, 0.8229, 0.9186, 0.9963, 0.8094, 0.4029, 0.8710,
0.8929, 0.7324, 0.1416, 0.0166, 0.3761, 0.0756, 0.6547, 0.2994, 0.9885,
0.5024, 0.1209]])
y.shape=torch.Size([2, 10])
y=tensor([[-0.0582, 0.0382, -0.1652, -0.0069, 0.0254, 0.2029, 0.3390, 0.0706,
0.1821, 0.0664],
[-0.0840, 0.0417, -0.1278, 0.0178, 0.0898, 0.0510, 0.3621, -0.0420,
0.2540, 0.0263]], grad_fn=<AddmmBackward>)
1.2 自定义块
我们在自定义块的过程中,我们需要将自定义的块类继承自父类 nn.Module,并重写父类初始化方法函数和前向函数,我们可以用 torch.nn.functional 里面的函数 F.relu()进行数据流向设计,F.relu为函数,适合在forward中调用。
- 代码
# -*- coding: utf-8 -*-
# @Project: zc
# @Author: zc
# @File name: pytorch_block_test
# @Create time: 2021/11/30 20:06
# 1. 导入相关库
import torch
from torch import nn
from d2l import torch as d2l
from torch.nn import functional as F
# 2. 自定义网络类,一般都是继承自 nn.Module 父类
# 我们只需要覆盖父类的初始化函数 __init__和前向传播函数 forward
class MLP(nn.Module):
def __init__(self):
super().__init__() # 调用父类nn.Module的初始化函数
self.hidden = nn.Linear(20, 256) # 定义self.hidden 为第一个全连接层
self.out = nn.Linear(256, 10) # 定义 self.out 为第二个全连接层
def forward(self, x):
# 定义流向 self.hidden -> F.relu -> self.out
return self.out(F.relu(self.hidden(x)))
# 3.定义输入 x, x 从均值为0,方差为 1 的正太分布中采样,得到一个矩阵:行=2,列=20
x = torch.randn(2, 20)
# 4.实例化一个 MLP 类。名为 net
net = MLP()
y = net(x) # (2,20)->(20,256)->(256,10) ----> 根据流向,可以得到 x.shape = torch.Size([2,10])
# 5.打印输出 x, y 和 y的形状
print(f'x.shape={x.shape}')
print(f'x={x}')
print(f'y.shape={y.shape}')
print(f'y={y}')
- 结果
x.shape=torch.Size([2, 20])
x=tensor([[-0.3527, 1.6644, 1.0310, 0.4432, 2.1372, -1.8731, 0.3239, 0.6662,
0.7836, -2.2122, -0.2200, 0.0370, 0.8928, -0.9010, 0.0980, -0.7804,
-1.4364, 0.3347, -0.0531, 0.3445],
[ 0.1153, -1.1421, 0.8288, -0.2353, -0.1409, -0.1211, 0.5926, -0.0063,
-0.5935, -0.4580, -0.4329, 0.6194, 0.3433, 0.6347, 0.3039, 1.3130,
-0.4770, 0.6959, 0.5533, -0.9702]])
y.shape=torch.Size([2, 10])
y=tensor([[ 0.2323, 0.2224, 0.1307, -0.2737, 0.0360, -0.5250, 0.2116, 0.5169,
0.4127, 0.0427],
[ 0.0192, 0.2411, 0.1426, -0.0705, -0.0751, 0.0207, -0.1251, -0.1801,
0.1788, 0.0235]], grad_fn=<AddmmBackward>)
1.3 MySequential 自定义顺序结构
我们将自己设计的类MySequential继承自 nn.Module,使用了 nn.Module 中的 self._modules ,它是一个顺序字典 OrderedDict(),forward函数将每个字典赋其对应的值。
- 代码
# -*- coding: utf-8 -*-
# @Project: zc
# @Author: zc
# @File name: MySequential
# @Create time: 2021/11/30 21:21
# 1.导入数据库
import torch
from torch import nn
from torch.nn import functional as F
from d2l import torch as d2l
# 2. 定义网络类
class MySequential(nn.Module):
def __init__(self, *args): # 传入参数,比如 nn.Linear()代表一个参数 args
super().__init__() # 调用父类nn.Module的初始化函数
for block in args: # 逐个迭代参数,将每个参数放到顺序字典中
self._modules[block] = block # self._modules = OrderedDict(),
def forward(self, x):
for block in self._modules.values():
x = block(x) # 将值放到对应参数中
return x
# 3. 实例化一个网络,传入相关参数
net = MySequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))
# 4.定义输入 x, x 从均值为0,方差为 1 的正太分布中采样,得到一个矩阵:行=2,列=20
x = torch.randn(2, 20)
# 5. 将 x 传入到网络中,输出 y
y = net(x)
# 7.打印输出 x, y 和 y的形状
print(f'x.shape={x.shape}')
print(f'x={x}')
print(f'y.shape={y.shape}')
print(f'y={y}')
- 结果
x.shape=torch.Size([2, 20])
x=tensor([[-0.9076, -0.0042, -1.4294, 0.5501, -0.7378, -0.2270, -0.0329, -0.1985,
-1.4995, -1.8934, -1.0175, -0.4681, -1.8310, -2.4647, 0.7275, 1.9416,
-0.4558, 0.7354, 0.2370, 0.6599],
[ 0.3453, 2.3085, -0.1061, 2.3007, -0.9068, -1.0150, -0.7913, 0.7521,
-0.6367, 0.6724, 0.5350, 0.5830, 1.2485, 0.5318, 0.7283, -1.1654,
-0.0523, -0.3722, -0.3253, 0.1035]])
y.shape=torch.Size([2, 10])
y=tensor([[ 0.0930, 0.0631, -0.2920, 0.3233, 0.1828, -0.3249, -0.2367, -0.2204,
-0.1493, 0.4884],
[ 0.1747, 0.2383, -0.1816, -0.0387, -0.0336, -0.8129, -0.2613, -0.3963,
0.1061, 0.2563]], grad_fn=<AddmmBackward>)
1.4 含常数参数的块
当我们在定义网络的过程中,我们并不希望所有的参数都是需要训练的,我们只是希望它是常数权重,它不会在训练过程中更新,我们可以设置其梯度更新为False; requires_grad=False
- 代码
# -*- coding: utf-8 -*-
# @Project: zc
# @Author: zc
# @File name: Constant_class
# @Create time: 2021/11/30 21:57
# 1.导入数据库
import torch
from torch import nn
from torch.nn import functional as F
from d2l import torch as d2l
# 2. 定义网络类
class FixHiddenMLP(nn.Module):
def __init__(self):
super().__init__()
self.weight = torch.randn((20, 20), requires_grad=False)
self.linear = nn.Linear(20, 20)
def forward(self, x):
x = self.linear(x)
x = F.relu(torch.mm(x, self.weight) + 1)
x = self.linear(x)
while x.abs().sum() > 1:
x /= 2
return x.sum()
# 3. 实例化一个网络
net = FixHiddenMLP()
# 4.定义输入 x, x 从均值为0,方差为 1 的正太分布中采样,得到一个矩阵:行=2,列=20
x = torch.randn(2, 20)
# 5. 将 x 传入到网络中,输出 y
y = net(x)
# 6.打印输出 x, y 和 y的形状
print(f'x.shape={x.shape}')
print(f'x={x}')
print(f'y.shape={y.shape}')
print(f'y={y}')
- 结果
x.shape=torch.Size([2, 20])
x=tensor([[-1.2292, 1.2531, 0.0570, 0.0789, 1.0228, 0.0171, -1.2316, 0.0291,
-0.6296, -1.5072, -1.6631, -0.2863, -0.6712, -0.2611, 0.4859, -0.4698,
0.2550, -0.1078, 0.0936, 0.3198],
[-1.2635, -1.0466, -0.9018, -0.7134, 0.5184, -0.9594, 1.2047, 0.0948,
1.2742, -0.6078, -1.4643, -0.6303, 2.0382, -0.4367, 1.0663, 0.9935,
1.0251, 0.3888, 0.2856, 0.5535]])
y.shape=torch.Size([])
y=-0.08202319592237473
1.5 块的嵌套
为了项目的需要,我们需要构建更为复杂的网络,所以我们需要使用到块的嵌套来实现。
- 代码
# -*- coding: utf-8 -*-
# @Project: zc
# @Author: zc
# @File name: NestMLP
# @Create time: 2021/11/30 22:16
# 1.导入数据库
import torch
from torch import nn
from torch.nn import functional as F
# 2.定义嵌套网络,在类中嵌套
class NestMLP(nn.Module):
def __init__(self):
super().__init__() # 调用父类nn.Module的初始化函数
self.net = nn.Sequential(nn.Linear(20, 64), nn.ReLU(), # 定义一个 net 块网络
nn.Linear(64, 32), nn.ReLU())
self.linear = nn.Linear(32, 16) # 定义一个全连接层
def forward(self, x):
return self.linear(self.net(x)) # 嵌套块 self.net -> self.linear
# 3. 实例化一个网络,并外部再嵌套
net = nn.Sequential(NestMLP(), nn.Linear(16, 10))
# 4.定义输入 x, x 从均值为0,方差为 1 的正太分布中采样,得到一个矩阵:行=2,列=20
x = torch.randn(2, 20)
# 5. 将 x 传入到网络中,输出 y
y = net(x)
# 6.打印输出 x, y 和 y的形状
print(f'x.shape={x.shape}')
print(f'x={x}')
print(f'y.shape={y.shape}')
print(f'y={y}')
- 结果
x.shape=torch.Size([2, 20])
x=tensor([[ 1.4993, -0.9844, 0.0595, -0.4456, 0.9731, -2.2285, -0.6895, 1.2156,
0.0364, -0.4020, 0.4846, 0.6030, 1.3199, -0.0964, -1.4974, -1.4573,
-0.3989, 1.0101, 0.8199, 0.1939],
[ 0.0588, 0.2262, 0.3192, 0.0133, 0.9729, 0.1620, 0.4958, -0.3061,
-0.8012, 0.6337, -2.0838, -0.5348, 2.8014, -0.4037, -0.6896, -0.3641,
0.5970, 0.2919, 0.1152, -0.1861]])
y.shape=torch.Size([2, 10])
y=tensor([[ 0.0641, 0.1499, -0.0387, 0.1439, -0.0708, -0.0974, -0.2287, 0.1267,
0.2585, 0.1211],
[ 0.1187, 0.1858, 0.0008, 0.1155, -0.0687, -0.0281, -0.1360, 0.1136,
0.2844, 0.1466]], grad_fn=<AddmmBackward>)
1.6 小结
- 自定义类继承自 nn.Module,调用父类的初始化函数和前向函数
- 不需要更新的值设置requires_grad = False
- 可以使用网络的嵌套
- 当我们需要对于网络进行学习的时候,我们一般用类组合使用(如 nn.ReLu().);而当我们不需要更新参数的时候,在自定义forward函数中,我们可以用 F.relu()函数。
2. 参数的管理
2.1 net[i].state_dict()
当我们想访问网络中某个全连接层的权重和偏移时,我们可以调用 net[i].state_dict()来查看,
# 1. 输出 第 0 个全连接层的参数 weight & bias
print(f'net[0].state_dict={net[0].state_dict()}')
当我们想一次性访问 net 中所有的参数的时候,我们可以用网络的 net.named_parameters()
print(*[(name, param.shape) for name, param in net.named_parameters()])
- 代码:
# -*- coding: utf-8 -*-
# @Project: zc
# @Author: zc
# @File name: MLP_state_dict
# @Create time: 2021/12/1 7:55
# 1. 导入数据库
import torch
from torch import nn
from torch.nn import functional as F
# 2. 用 nn.Sequential 定义简单的 MLP网络
net = nn.Sequential(nn.Linear(4, 8), nn.ReLU(), nn.Linear(8, 1))
# 3. 初始化输入参数 x, 导入到网络 net 中,得到实例化 y
x = torch.randn((2, 4))
y = net(x)
# 4. 输出 x,y 的值
print(f'x.shape={x.shape}')
print(f'x={x}')
print(f'y.shape={y.shape}')
print(f'y={y}')
# 5. 输出 第 0 个全连接层的参数 weight & bias
print(f'net[0].state_dict={net[0].state_dict()}')
# 6. 输出 第 2 个全连接层的参数 weight & bias
print(f'net[2].state_dict={net[2].state_dict()}')
# 7. 当我们想一次性访问 net 中所有的参数的时候,
# 我们可以用网络的 net.named_parameters()
print(f'net.named_parameters()={[(name, param.shape) for name, param in net.named_parameters()]}')
- 结果
x.shape=torch.Size([2, 4])
x=tensor([[-6.1701e-01, -8.0752e-01, 1.3000e-03, -3.0338e-01],
[-9.5879e-02, 5.7886e-01, 1.0582e+00, 1.7231e+00]])
y.shape=torch.Size([2, 1])
y=tensor([[-0.1381],
[ 0.0655]], grad_fn=<AddmmBackward>)
net[0].state_dict=OrderedDict([('weight', tensor([[-0.1712, 0.3055, -0.2866, 0.4584],
[-0.3777, -0.4429, 0.2401, -0.1080],
[ 0.3246, -0.3972, 0.4398, 0.2152],
[ 0.2989, 0.0303, -0.4044, -0.1043],
[ 0.2464, -0.3810, 0.4455, -0.3012],
[ 0.0170, -0.0318, -0.1700, 0.2406],
[ 0.4608, 0.1037, -0.0280, 0.0904],
[-0.3881, -0.0400, 0.3549, -0.4435]])), ('bias', tensor([ 0.4972, -0.3790, 0.1939, -0.4700, -0.3414, 0.3996, 0.2762, 0.4395]))])
net[2].state_dict=OrderedDict([('weight', tensor([[ 0.1875, 0.3011, -0.2403, -0.1602, 0.0164, 0.0279, 0.1943, -0.1703]])), ('bias', tensor([-0.0577]))])
net.named_parameters()=[('0.weight', torch.Size([8, 4])), ('0.bias', torch.Size([8])), ('2.weight', torch.Size([1, 8])), ('2.bias', torch.Size([1]))]
2.2 net[i].bias&net[i].weight
我们可以直接参看网络中指定全连接层的权重和偏差,在反向传播前的梯度为None,反向传播后含有相关值。
- 代码
# -*- coding: utf-8 -*-
# @Project: zc
# @Author: zc
# @File name: net_weight
# @Create time: 2021/12/1 10:10
# 1. 导入数据库
import torch
from torch import nn
from torch.nn import functional as F
# 2. 用 nn.Sequential 定义简单的 MLP网络
net = nn.Sequential(nn.Linear(4, 8), nn.ReLU(), nn.Linear(8, 1))
# 3. 初始化输入参数 x, 导入到网络 net 中,得到实例化 y
x = torch.randn((2, 4))
y = net(x)
# 4. 访问第 0 的权重参数,含值和梯度
print(f'net[0].weight={net[0].weight}')
# 5. 访问第 0 的权重的值
print(f'net[0].weight.data={net[0].weight.data}')
# 6. 访问第 0 的权重的梯度,反向传播前为 NONE
print(f'before_backward:net[0].weight.grad={net[0].weight.grad}')
# 7. 访问第 0 的权重的梯度,反向传播后有梯度值
y.sum().backward()
print(f'after_backward:net[0].weight.grad={net[0].weight.grad}')
- 结果
net[0].weight=Parameter containing:
tensor([[ 0.0069, -0.1325, -0.0200, -0.0368],
[ 0.3919, 0.3281, 0.4353, 0.0637],
[-0.1967, 0.1340, -0.2187, 0.3443],
[ 0.4322, -0.3234, -0.3824, -0.2407],
[-0.3195, 0.0724, -0.3629, -0.3278],
[-0.2065, -0.4287, 0.0216, 0.4784],
[-0.2902, -0.3653, -0.1361, -0.4783],
[ 0.3232, -0.1158, 0.0086, -0.0837]], requires_grad=True)
net[0].weight.data=tensor([[ 0.0069, -0.1325, -0.0200, -0.0368],
[ 0.3919, 0.3281, 0.4353, 0.0637],
[-0.1967, 0.1340, -0.2187, 0.3443],
[ 0.4322, -0.3234, -0.3824, -0.2407],
[-0.3195, 0.0724, -0.3629, -0.3278],
[-0.2065, -0.4287, 0.0216, 0.4784],
[-0.2902, -0.3653, -0.1361, -0.4783],
[ 0.3232, -0.1158, 0.0086, -0.0837]])
before_backward:net[0].weight.grad=None
after_backward:net[0].weight.grad=tensor([[ 0.1667, 0.2206, -0.0721, -0.0612],
[ 0.0430, 0.0353, -0.0211, -0.0311],
[ 0.1484, 0.0509, -0.0810, -0.1576],
[ 0.1497, 0.1230, -0.0734, -0.1083],
[-0.0790, -0.0649, 0.0387, 0.0571],
[ 0.0000, 0.0000, 0.0000, 0.0000],
[ 0.0000, 0.0000, 0.0000, 0.0000],
[ 0.1541, 0.1265, -0.0756, -0.1114]])
2.2 内置参数初始化 apply
如果我们的网络中有全连接层,我们可以初始化全连接层的参数,有的类里面默认初始化为nn.init.kaiming_normal;我们也可以自己定义初始化;
- 代码
# -*- coding: utf-8 -*-
# @Project: zc
# @Author: zc
# @File name: apply_weight
# @Create time: 2021/12/1 10:26
# 1. 导入数据库
import torch
from torch import nn
from torch.nn import functional as F
# 2. 用 nn.Sequential 定义简单的 MLP网络
net = nn.Sequential(nn.Linear(4, 8), nn.ReLU(), nn.Linear(8, 1))
# 3. 使用内置 apply,从正太分布中采样
def init_normal(m):
if type(m) == nn.Linear: # 类型判断
nn.init.normal_(m.weight, mean=0, std=0.01) # 从正太分布(均值为0,方差为0.01)中采样 ,
nn.init.zeros_(m.bias) # 偏差为常数 0
# 4.初始化网络
net.apply(init_normal)
# 5.打印网络
print(net.state_dict())
- 结果
OrderedDict([('0.weight', tensor([[-0.0026, 0.0139, -0.0005, 0.0193],
[ 0.0085, 0.0012, -0.0056, -0.0005],
[-0.0112, -0.0009, -0.0118, -0.0049],
[-0.0037, -0.0022, -0.0015, -0.0019],
[-0.0093, 0.0094, 0.0089, 0.0290],
[ 0.0075, -0.0060, -0.0082, 0.0043],
[ 0.0028, 0.0052, 0.0049, 0.0130],
[-0.0065, 0.0129, 0.0159, -0.0050]])), ('0.bias', tensor([0., 0., 0., 0., 0., 0., 0., 0.])), ('2.weight', tensor([[-0.0023, 0.0126, 0.0011, -0.0028, -0.0008, -0.0042, -0.0139, 0.0098]])), ('2.bias', tensor([0.]))])
2.3 常见的初始化
- xavier 初始化
def xavier(m):
if type(m) == nn.Linear:
nn.init.xavier_normal_(m.weight)
nn.init.zeros_(m.bias)
2.4 参数共享
当我们需要在不同网络间进行网络值共享时,我们可以设计一个shared变量进行共享
- 代码
# -*- coding: utf-8 -*-
# @Project: zc
# @Author: zc
# @File name: shared_weight
# @Create time: 2021/12/1 13:44
# 1.导入相关数据库
import torch
from torch import nn
from torch.nn import functional as F
# 2.定义共享变量
shared = nn.Linear(8, 8)
# 3.定义网络
net = nn.Sequential(
nn.Linear(4, 8), nn.ReLU(),
shared, nn.ReLU(),
shared, nn.ReLU(),
nn.Linear(8, 1))
# 4. 初始化 x ,y
x = torch.randn((2, 4))
y = net(x)
# 5. 打印两个shared ,判断是否值是一样的
print(f'net[2]_shared_1={net[2].weight.data}')
print(f'net[4]_shared_1={net[4].weight.data}')
print(f'是否一致:={net[2].weight.data == net[4].weight.data}')
- 结果
net[2]_shared_1=tensor([[-0.3470, 0.1637, 0.1820, -0.3244, 0.2915, -0.2954, -0.1513, 0.2161],
[ 0.0279, -0.0046, -0.3354, 0.3328, -0.2016, -0.1883, 0.1146, 0.2739],
[-0.3233, 0.0792, -0.1184, -0.2801, -0.3365, -0.2878, 0.1755, 0.2569],
[ 0.1951, -0.2067, -0.1324, 0.0980, -0.2810, 0.2466, -0.1117, 0.0034],
[-0.3462, 0.1150, 0.2141, 0.3004, 0.3027, 0.2214, -0.0965, -0.2800],
[-0.0846, -0.0802, -0.2262, -0.1897, 0.1528, 0.0146, -0.0337, 0.3307],
[ 0.2678, 0.2924, -0.1849, -0.2794, 0.2414, -0.2814, -0.2122, -0.2829],
[ 0.2998, -0.2362, 0.3262, 0.0263, -0.2136, -0.0747, 0.3332, -0.1522]])
net[4]_shared_1=tensor([[-0.3470, 0.1637, 0.1820, -0.3244, 0.2915, -0.2954, -0.1513, 0.2161],
[ 0.0279, -0.0046, -0.3354, 0.3328, -0.2016, -0.1883, 0.1146, 0.2739],
[-0.3233, 0.0792, -0.1184, -0.2801, -0.3365, -0.2878, 0.1755, 0.2569],
[ 0.1951, -0.2067, -0.1324, 0.0980, -0.2810, 0.2466, -0.1117, 0.0034],
[-0.3462, 0.1150, 0.2141, 0.3004, 0.3027, 0.2214, -0.0965, -0.2800],
[-0.0846, -0.0802, -0.2262, -0.1897, 0.1528, 0.0146, -0.0337, 0.3307],
[ 0.2678, 0.2924, -0.1849, -0.2794, 0.2414, -0.2814, -0.2122, -0.2829],
[ 0.2998, -0.2362, 0.3262, 0.0263, -0.2136, -0.0747, 0.3332, -0.1522]])
是否一致:=tensor([[True, True, True, True, True, True, True, True],
[True, True, True, True, True, True, True, True],
[True, True, True, True, True, True, True, True],
[True, True, True, True, True, True, True, True],
[True, True, True, True, True, True, True, True],
[True, True, True, True, True, True, True, True],
[True, True, True, True, True, True, True, True],
[True, True, True, True, True, True, True, True]])
3.层的设计
3.1 不带参数的层
我们可以设计一个不带参数的层,直接继承自 nn.Module。
# -*- coding: utf-8 -*-
# @Project: zc
# @Author: zc
# @File name: pytorch_block_test
# @Create time: 2021/11/30 20:06
# 1. 导入相关库
import torch
from torch import nn
from d2l import torch as d2l
from torch.nn import functional as F
# 2.不带参数的类,求均值
class CenterLayer(nn.Module):
def __init__(self):
super(CenterLayer, self).__init__()
def forward(self, x):
return x - x.mean()
# 3.实例化-不带参数的类
layer = CenterLayer()
# 4.初始化 x ,并输出 y
x = torch.FloatTensor([1, 2, 3, 4, 5])
y = layer(x)
print(f'x={x}')
print(f'y={y}')
- 结果
x=tensor([1., 2., 3., 4., 5.])
y=tensor([-2., -1., 0., 1., 2.])
3.1 带参数的层
我们可以设计一个带参数的层,直接继承自 nn.Module,需要更新的参数可以用nn.Parameter表示
self.weight = nn.Parameter(torch.randn(in_units, out_units))
self.bias = nn.Parameter(torch.rand(out_units, ))
- 代码
# -*- coding: utf-8 -*-
# @Project: zc
# @Author: zc
# @File name: weight_parameters
# @Create time: 2021/12/1 18:53
# 1. 导入相关数据库
import torch
from torch import nn
from torch.nn import functional as F
# 2.定义网络类 ,nn.Parameter表示这个参数是可以随时更新记录的
class MyLinear(nn.Module):
def __init__(self, in_units, out_units):
super().__init__()
self.weight = nn.Parameter(torch.randn(in_units, out_units))
self.bias = nn.Parameter(torch.rand(out_units, ))
def forward(self, x):
linear = torch.matmul(x, self.weight.data) + self.bias.data
return F.relu(linear)
# 3.实例化类,并输出相关参数
linear = MyLinear(5, 3)
print(f'linear_weight={linear.weight}')
print(f'linear_bias={linear.bias}')
- 结果
linear_weight=Parameter containing:
tensor([[-0.1125, -0.2875, -0.7970],
[ 0.3203, -0.2908, 0.5508],
[-1.1320, 0.5827, 1.0698],
[ 0.3099, 1.0385, 0.8846],
[ 0.3072, 0.7499, -2.3270]], requires_grad=True)
linear_bias=Parameter containing:
tensor([0.9743, 0.1375, 0.2827], requires_grad=True)
4.数据保存和加载
我们可以用 torch.save 来保存相关数据,用 torch.load来加载相关数据。详细请看代码测试
- 代码
# -*- coding: utf-8 -*-
# @Project: zc
# @Author: ZhangChu
# @File name: pytorch_save
# @Create time: 2021/12/1 19:58
# 1. 导入数据库
import torch
from torch import nn
from torch.nn import functional as F
# 2.自定义 MLP网络
class MLP(nn.Module):
def __init__(self):
super().__init__()
self.hidden = nn.Linear(20, 256)
self.output = nn.Linear(256, 10)
def forward(self, x):
return self.output(F.relu(self.hidden(x)))
# 3.实例化 MLP
net = MLP()
# 4. 初始化 x,y
x = torch.randn(size=(2, 20))
y = net(x)
print(f'x={x}')
print(f'y={y}')
# 5. 将网络的参数打印出来
print(f'net.state_dict={net.state_dict()}')
# 6. torch.save 将网络的参数保存下来,命名为 mlp.params
torch.save(net.state_dict(), 'mlp.params')
# 7.实例化一个新的类 MLP
clone = MLP()
# 8.torch.load_state_dict() 将存储的参数导入到新的实例中
clone.load_state_dict(torch.load('mlp.params'))
# 9.将模型设置为评估模式
clone.eval()
# 10. 比较原始 y 与重新加载 y_clone 的值是否一致
# 如果一致,说明导入成功.
y_clone = clone(x)
print(y_clone == y)
- 结果
x=tensor([[ 0.2219, -0.4316, 0.8380, -0.0142, 0.0846, -0.8432, -0.0031, 0.4185,
1.3405, 0.4688, -1.2442, -0.4675, -1.3307, 0.5283, 2.7393, -0.5348,
0.1841, -1.8828, -2.0372, 1.2543],
[ 1.6050, -2.2175, -1.6813, 0.9196, 0.4500, -1.8302, -0.5355, -1.4505,
1.1476, -1.4600, -2.0398, 0.3679, 0.7460, -0.0891, -1.1213, -1.3636,
1.5735, -1.7510, -1.0200, -1.1744]])
y=tensor([[ 0.0196, -0.5592, -0.0448, -0.0885, -0.6438, 0.0195, -0.6327, 0.1794,
-0.1254, 0.0393],
[-0.0510, -0.9104, -0.2452, 0.2919, 0.4454, -0.1018, -0.4897, 0.3997,
-0.6953, 0.4925]], grad_fn=<AddmmBackward>)
net.state_dict=OrderedDict([('hidden.weight', tensor([[ 0.0893, -0.0949, -0.0169, ..., 0.1048, -0.2107, -0.0523],
[-0.2225, -0.1004, 0.1272, ..., -0.1075, 0.1671, -0.0635],
[ 0.0349, -0.0633, -0.0523, ..., 0.1080, -0.2124, 0.0052],
...,
[ 0.0355, -0.1667, -0.0085, ..., -0.0834, -0.1167, 0.0926],
[-0.0295, 0.1983, -0.0115, ..., -0.0344, -0.0866, -0.1100],
[ 0.0014, 0.1308, 0.1674, ..., -0.2143, 0.1006, 0.1351]])), ('hidden.bias', tensor([-0.0412, 0.0521, 0.2112, -0.0843, 0.0214, -0.0543, 0.0104, -0.1204,
0.0988, -0.0854, -0.0495, 0.1759, 0.1284, -0.1945, 0.1486, -0.1811,
0.1670, 0.0447, -0.2026, 0.0276, 0.2028, 0.1421, -0.0644, 0.0258,
-0.0405, -0.1990, 0.2107, 0.0753, 0.2155, -0.1536, 0.0498, -0.0690,
-0.0436, -0.1818, -0.1628, 0.1239, 0.0877, 0.1570, 0.0329, -0.0955,
-0.0630, -0.1081, -0.0330, 0.1257, -0.1073, 0.1594, 0.0419, 0.0942,
0.1430, -0.0911, -0.1201, 0.2155, 0.0835, 0.1279, 0.0628, 0.0936,
0.0066, 0.2094, -0.1091, -0.0361, -0.1836, 0.0618, 0.0501, -0.2219,
0.1368, 0.1058, 0.1054, 0.2224, -0.0251, -0.2060, 0.1794, 0.0909,
0.1151, -0.0428, 0.0887, -0.2022, -0.1684, 0.0813, 0.1940, -0.1228,
-0.0397, 0.1024, -0.0261, 0.1132, -0.1859, -0.0682, -0.1409, -0.1539,
0.0017, 0.1412, 0.1746, -0.0881, 0.2075, 0.1979, -0.1900, -0.0306,
-0.0684, -0.0165, 0.0440, 0.0633, 0.0764, 0.1557, -0.0516, 0.1629,
-0.1061, -0.1876, 0.1332, -0.0596, -0.0064, 0.0570, 0.1791, -0.1750,
-0.0538, 0.1088, -0.0514, 0.2085, -0.1038, 0.1966, -0.0321, -0.1150,
0.0321, 0.0199, -0.1594, 0.1867, 0.1977, 0.0304, 0.0207, -0.0796,
-0.1716, 0.0954, -0.1753, -0.0263, 0.1508, 0.0563, -0.1008, 0.0726,
0.1662, 0.0369, 0.0973, -0.2164, 0.1884, -0.1914, -0.0928, 0.1116,
0.1121, 0.0350, -0.1348, -0.0011, 0.0502, -0.0584, -0.1721, 0.1021,
-0.1445, -0.0180, -0.1096, -0.0876, -0.1290, 0.0798, 0.1320, 0.1919,
-0.1917, -0.2168, -0.2010, 0.1543, 0.0713, 0.1142, -0.0097, -0.0392,
-0.0597, 0.0076, 0.1953, -0.0114, 0.1054, 0.0745, 0.0921, -0.0921,
0.1676, -0.1987, -0.1054, -0.0082, -0.1123, 0.0654, 0.0151, 0.0572,
-0.0614, 0.1370, -0.1910, -0.1708, 0.0745, 0.0581, 0.0163, 0.0984,
-0.1471, 0.1034, 0.1413, 0.0510, -0.0888, 0.0397, 0.1010, -0.1986,
-0.0590, -0.1887, 0.0260, -0.1806, -0.1348, -0.1598, -0.0525, -0.1458,
0.0823, 0.1159, 0.2140, -0.2087, -0.0501, 0.1704, 0.0279, 0.1475,
0.0657, 0.1999, 0.1904, -0.1485, 0.1911, 0.1969, -0.0663, -0.0253,
-0.1808, 0.1746, 0.0751, -0.1437, 0.0960, -0.0187, -0.1109, -0.0273,
0.1566, 0.1358, -0.1199, -0.0487, -0.0404, 0.1591, 0.0827, 0.0545,
0.2038, -0.1562, -0.1663, 0.1159, -0.0316, -0.1951, -0.0977, 0.0733,
0.1210, -0.0011, -0.1925, -0.0920, -0.1524, 0.0619, 0.0447, -0.0910])), ('output.weight', tensor([[ 0.0296, -0.0379, 0.0391, ..., 0.0354, 0.0332, 0.0277],
[ 0.0428, 0.0379, -0.0257, ..., 0.0187, -0.0234, -0.0241],
[-0.0526, -0.0003, 0.0386, ..., -0.0325, -0.0237, 0.0578],
...,
[ 0.0162, 0.0202, 0.0056, ..., -0.0289, -0.0289, -0.0380],
[-0.0512, -0.0448, -0.0402, ..., 0.0190, -0.0407, 0.0374],
[ 0.0442, -0.0558, 0.0555, ..., 0.0314, 0.0578, 0.0332]])), ('output.bias', tensor([ 0.0061, -0.0057, 0.0319, -0.0361, -0.0500, 0.0301, -0.0196, 0.0411,
-0.0153, -0.0190]))])
tensor([[True, True, True, True, True, True, True, True, True, True],
[True, True, True, True, True, True, True, True, True, True]])
5. GPU 相关参数设置
5.1 调用GPU相关信息
- 代码
# -*- coding: utf-8 -*-
# @Project: zc
# @Author: zc
# @File name: pytorch_gpu
# @Create time: 2021/12/1 21:33
# 1. 导入相关数据库
import torch
from torch import nn
# 2. 查看 cuda 是否可以用
print(f'torch.cuda.is_available()={torch.cuda.is_available()}')
# 3. 获取当前的 GPU 编号
print(f'torch.cuda.current_device()={torch.cuda.current_device()}')
# 4. 获取当前的 GPU 数量
print(f'torch.cuda.device_count()={torch.cuda.device_count()}')
# 5. 获取当前的 GPU 版本
print(f'torch.cuda.get_device_name()={torch.cuda.get_device_name(0)}')
# 6. 获取当前的 GPU 版本特性参数
print(torch.cuda.get_device_properties(device='cuda:0'))
- 结果
torch.cuda.is_available()=True
torch.cuda.current_device()=0
torch.cuda.device_count()=1
torch.cuda.get_device_name()=GeForce GTX xxxx
_CudaDeviceProperties(name='GeForce GTX xxx', major=6, minor=1, total_memory=xxxxxMB, multi_processor_count=10)
5.2 GPU上张量和模型
- GPU上创建张量
Pytorch中默认是在 CPU上创建张量的,我们如果想在 GPU上创建张量 ,需要设置 如下:
y = torch.tensor([1, 2, 3], device='cuda:0')
- GPU上运行模型
net = net.to(device='cuda:0')
- 不在同一设备上的数据无法进行计算
如果我们在 CPU 上创建 x = torch.tensor([1,2,3]),在 GPU上创建 net = nn.Sequential(nn.Linear(3, 1));如果我们将 x 传入到模型中,那么就会报错,原因是这两个东西都不是在同一个设备中; - 代码
# -*- coding: utf-8 -*-
# @Project: zc
# @Author: zc
# @File name: pytorch_block_test
# @Create time: 2021/11/30 20:06
# 1. 导入相关库
import torch
from torch import nn
from d2l import torch as d2l
# 2. 调用 CPU
print(torch.device('cpu'))
# 3. 调用 GPU ;
print(torch.cuda.device('cuda:0'))
# 4.定义函数,如果有 GPU就返回GPU,否则返回 CPU
def try_gpu(i=0):
if torch.cuda.device_count() >= i + 1:
return torch.device(f'cuda:{i}')
else:
return torch.device('cpu')
# 5. 设置变量 device ,方便后续调用
device = try_gpu()
print(f'device={device}')
# 6. Pytorch 默认在 CPU上创建张量 x
x = torch.tensor([1, 2, 3])
print(f'x_device={x.device}')
# 7. 在Pytorch 调用 GPU并创建张量
y = torch.tensor([1, 2, 3], device=device)
print(f'y_device={y.device}')
# 8. 在 CPU上创建网络
net = nn.Sequential(nn.Linear(3, 1))
print(f'before:next(net.parameters()).device={next(net.parameters()).device}')
# 9. 将 net 放到 GPU 上
net = net.to(device=device)
print(f'after:next(net.parameters()).device={next(net.parameters()).device}')
- 结果
cpu
<torch.cuda.device object at 0x000002999D6AA400>
device=cuda:0
x_device=cpu
y_device=cuda:0
before:next(net.parameters()).device=cpu
after:next(net.parameters()).device=cuda:0