目录
1. 查看/调用 模型的权重.
import torch
import torch.nn as nn
from torchvision import models
class MyModel(nn.Module):
def __init__(self, ): # input the dim of output fea-map of Resnet:
super(MyModel, self).__init__()
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.relu = nn.ReLU(inplace=True)
self.gap = nn.AdaptiveAvgPool1d(1)
self.fc = nn.Linear(2048, 512)
def forward(self, input): # input is 2048!
x = self.conv1(input)
x = self.bn1(x)
x = self.relu(x)
x = self.gap(x)
x = self.fc(x)
return x
##############################
# 模型准备
model = MyModel()
blank = ' '
print('-----------------------------------------------')
print('| weight name | weight shape |')
print('-----------------------------------------------')
for index, (key, w_variable) in enumerate(model.named_parameters()):
if len(key)<=15: key = key + (15-len(key))*blank
w_variable_blank = ''
if len(w_variable.shape) == 1:
if w_variable.shape[0] >= 100: w_variable_blank = 8*blank
else: w_variable_blank = 9*blank
elif len(w_variable.shape) == 2:
if w_variable.shape[0] >= 100: w_variable_blank = 2*blank
else: w_variable_blank = 3*blank
print('| {} | {}{} |'.format(key, w_variable.shape, w_variable_blank))
key = 0
print('-----------------------------------------------')
[结果]
-----------------------------------------------
| weight name | weight shape |
-----------------------------------------------
| conv1.weight | torch.Size([64, 3, 7, 7]) |
| bn1.weight | torch.Size([64]) |
| bn1.bias | torch.Size([64]) |
| fc.weight | torch.Size([512, 2048]) |
| fc.bias | torch.Size([512]) |
-----------------------------------------------
可以看到, 通过打印各层的 [权重名称] 和 [权重tensor的形状], 可以清晰的看到各种网络的权重形状, 为进一步操作权重提供方便.。
值得记录的
1. 权重矩阵的形状是 output_dim 在前,input_dim在后。比如全连接层:[ 512(out), 2048(input) ]。
2. 全连接层的具体操作方法(即:输入是如何与权重矩阵相乘的)。参考:矩阵-PyTorch-nn的形状.线性权重 - CocoaChina_一站式开发者成长社区
output = input.matmul(weight.t())
### 解释一下原因。input的形状是[B, inp_dim],而weight的形状是[out_dim, inp_dim]。
### 为了输出也同样是 [B,dim] 的形式, 需要让weight做一下转置
2. 打印模型参数量
转自:pytorch 计算网络参数个数_儒雅的小Z.的博客-CSDN博客
################
### 模型定义
# -------------
class MyModel(nn.Module):
def __init__(self, feat_dim): # input the dim of output fea-map of Resnet:
super(MyModel, self).__init__()
...
def forward(self, input): # input is 2048!
...
return x
net = MyModel()
######################################
type_size = 4 # float
params = list(net.parameters())
k = 0
for i in params:
l = 1
print("该层的结构:" + str(list(i.size())))
for j in i.size():
l *= j
print("该层参数和:" + str(l))
k = k + l
print("总参数数量和:" + str(k))
print('Model {} : params: {:4f}M'.format(net._get_name(), k * type_size / 1000 / 1000))
######################################
延伸:丨如何在Pytorch中精细化利用显存丨再次浅谈Pytorch中的显存利用问题丨
3. 权重初始化。| 官方文档 |
import torch
import torch.nn as nn
import torch.nn.functional as F
class MyModule(nn.Module):
def __init__(self):
super(MyModule, self).__init__()
self.fc1 = nn.Linear(3, 3, bias=False)
self.fc2 = nn.Linear(3, 3, bias=False)
self.fc3 = nn.Linear(3, 3, bias=False)
self.fc4 = nn.Linear(3, 3, bias=False)
self.fc5 = nn.Linear(3, 3, bias=False)
self.fc6 = nn.Linear(3, 3, bias=False)
nn.init.uniform_(self.fc1.weight, a=0, b=2)
nn.init.normal_(self.fc2.weight, mean=0.0, std=1.0)
nn.init.constant_(self.fc3.weight, val=5)
nn.init.eye_(self.fc4.weight)
nn.init.orthogonal_(self.fc5.weight)
nn.init.sparse_(self.fc6.weight, sparsity=0.3, std=0.01)
def forward(self, x):
x = self.fc(x)
return x
net=MyModule();print(net)
paras_fc1 = list(net.fc1.parameters()); print('\n--> uniform_:\t\n',paras_fc1)
paras_fc2 = list(net.fc2.parameters()); print('\n--> normal_:\t\n',paras_fc2)
paras_fc3 = list(net.fc3.parameters()); print('\n--> constant_:\t\n',paras_fc3)
paras_fc4 = list(net.fc4.parameters()); print('\n--> eye_:\t\n',paras_fc4)
paras_fc5 = list(net.fc5.parameters()); print('\n--> orthogonal_:\t\n',paras_fc5)
paras_fc6 = list(net.fc6.parameters()); print('\n--> sparse_:\t\n',paras_fc6)
【结果】:
--> uniform_:
[Parameter containing:
tensor([[ 1.0074, 1.1609, 0.9879],
[ 1.1045, 1.6254, 1.8265],
[ 0.2095, 0.9864, 0.0787]])]
--> normal_:
[Parameter containing:
tensor([[ 1.7744, -0.5984, 0.1673],
[-0.1872, 0.2185, -0.4709],
[ 0.5267, -0.2799, 0.4283]])]
--> constant_:
[Parameter containing:
tensor([[ 5., 5., 5.],
[ 5., 5., 5.],
[ 5., 5., 5.]])]
--> eye_:
[Parameter containing:
tensor([[ 1., 0., 0.],
[ 0., 1., 0.],
[ 0., 0., 1.]])]
--> orthogonal_:
[Parameter containing:
tensor([[-0.0546, 0.9774, -0.2043],
[-0.9091, -0.1332, -0.3946],
[-0.4129, 0.1642, 0.8959]])]
--> sparse_:
[Parameter containing:
tensor(1.00000e-02 *
[[-0.8230, 0.0000, 0.0000],
[ 0.0000, 0.4319, 1.0220],
[ 0.0275, 0.3808, 0.8031]])]
4. 自定义可训练参数。
参考:Pytorch自定义参数_Peter的脱发日记-CSDN博客_pytorch 自定义参数
class MyModule(nn.Module):
def __init__(self):
super(MyModule, self).__init__()
A = torch.randn((2, 3), requires_grad=True)
self.weight_matrix = torch.nn.Parameter(A)
print(self.weight_matrix)
# self.register_parameter("Ablah", self.A)
# weight init
nn.init.normal_(self.weight_matrix, 0, 100)
print(self.weight_matrix)
def forward(self, x):
return x
net = MyModule()
【结果】
Parameter containing:
tensor([[ 1.3814, -0.8291, -1.4620],
[ 0.2066, -1.0515, 0.4364]], requires_grad=True)
Parameter containing:
tensor([[ 21.0214, -27.3125, -86.9586],
[ -33.6872, -116.0802, 2.9250]], requires_grad=True)
注意:千万记得对参数初始化!
import torch
import torch.nn as nn
class MyModule(nn.Module):
def __init__(self):
super(MyModule, self).__init__()
self.conv_weight = nn.Parameter(torch.Tensor(C_out, C_in, k, k))
# weight init
nn.init.kaiming_normal_(self.conv_weight, mode='fan_out', nonlinearity='relu')
print(self.weight_matrix)
def forward(self, x):
return x
net = MyModule()
5. 在训练中人为改变参数。
在训练中改变参数,最需要搞清楚的反向传播是怎么更新参数的。
我们假设layer1的权重是 W ,在还没forward之前,我们把 W 做了修改变成了 W ',这样forward实际经过的是 W ’,所以backward实际作用的是 W ’ 。
import torch
import torch.nn as nn
class modelfunc(nn.Module):
def __init__(self, class_num):
super(modelfunc, self).__init__()
self.fc = nn.Linear(3, 2)
self.classifier = nn.Linear(2, class_num)
nn.init.normal_(self.fc.weight, mean=0.0, std=1.0)
# self.fc.weight.data = torch.FloatTensor([[1,2],[2,4],[3,4]]).t()
# print(self.fc.weight)
def forward(self, x):
print('\n Before Forward')
print(self.fc.weight.data)
### Change Weight ###
self.fc.weight.data = torch.FloatTensor([[1, 2], [2, 4], [3, 4]]).t()
x = self.fc(x)
x = self.classifier(x)
print('\n After Forward')
print(self.fc.weight.data)
return x
inputs = torch.FloatTensor(torch.rand(10, 3)) ### [B, dim]
target = torch.LongTensor([1, 0, 0, 0, 1, 1, 1, 1, 0, 1]) ### [B]
criterion = nn.CrossEntropyLoss()
net=modelfunc(2)
optimizer = torch.optim.Adam(net.parameters(), lr =0.1)
### train
for iii in range(2):
out = net(inputs)
optimizer.zero_grad()
loss = criterion(out, target)
loss.backward()
optimizer.step()
print(net.fc.weight)
结果:
Before Forward W
tensor([[0.7627, 0.6372, 0.9846],
[1.5721, 0.2233, 0.3378]])
After Forward W'
tensor([[1., 2., 3.],
[2., 4., 4.]])
Before Forward W' - lr
tensor([[0.9000, 1.9000, 2.9000],
[1.9000, 3.9000, 3.9000]])
After Forward
tensor([[1., 2., 3.],
[2., 4., 4.]])
Parameter containing:
tensor([[0.9260, 1.9277, 2.9258],
[1.9479, 3.9440, 3.9484]], requires_grad=True)
6. 取出高级封装模型中的 权重——_module。
由于高级封装的模型的模块名称很多会重复利用(最典型的Resnet50中的Bottleneck)。这种情况下,我们就需要利用 model._module 函数来帮助我们实现对模型中的权重的操作。
### 一般情况都会用到Parallel
model = nn.DataParallel(model).cuda()
### 使用 .module 函数调用中层的参数
print(model.module._modules['layer1'][0].conv2.weight.shape) # [64, 64, 3, 3]
#或
print(model.module._modules['layer1'][0]._modules['conv2'].weight.shape) # [64, 64, 3, 3]
### 解释一下
# 上面['layer1'] 表示resnet50中的layer1层,在layer1层下有 3 个 Bottleneck, 所以['layer1']后面还有一个 [0],[0]表示调用 layer1 下的第一个Bottleneck。调用完Bottleneck后,就可以调用Bottleneck下的模块了。
### 如果需要赋值,使用下面操作即可
new_weight = torch.FloatTensor(torch.rand([64, 64, 3, 3])).cuda() # [out, input, k, k]
model.module._modules['layer1'][0].conv2.weight.data = new_weight