Pytorch学习(十七)--- 模型load各种问题解决

25 篇文章 27 订阅

简单的模型load

一般来说,保存模型是把参数全部用model.cpu().state_dict(), 然后加载模型时一般用 model.load_state_dict(torch.load(model_path))。 值得注意的是:torch.load 返回的是一个 OrderedDict.

import torch
import torch.nn as nn

class Net_old(nn.Module):
    def __init__(self):
        super(Net_old, self).__init__()
        self.nets = nn.Sequential(
            torch.nn.Conv2d(1, 2, 3),
            torch.nn.ReLU(True),
            torch.nn.Conv2d(2, 1, 3),
            torch.nn.ReLU(True),
            torch.nn.Conv2d(1, 1, 3)
        )
    def forward(self, x):
        return self.nets(x)

class Net_new(nn.Module):
    def __init__(self):
        super(Net_old, self).__init__()
        self.conv1 = torch.nn.Conv2d(1, 2, 3)
        self.r1 = torch.nn.ReLU(True)
        self.conv2 = torch.nn.Conv2d(2, 1, 3)
        self.r2 = torch.nn.ReLU(True)
        self.conv3 = torch.nn.Conv2d(1, 1, 3)

    def forward(self, x):
        x = self.conv1(x)
        x = self.r1(x)
        x = self.conv2(x)
        x = self.r2(x)
        x = self.conv3(x)
        return x

network = Net_old()
torch.save(network.cpu().state_dict(), 't.pth')

pretrained_net = torch.load('t.pth')
print(pretrained_net)

for key, v in enumerate(pretrained_net):
    print key, v

可以看到

OrderedDict([('nets.0.weight',
(0 ,0 ,.,.) =
 -0.2436  0.2523  0.3097
 -0.0315 -0.1307  0.0759
  0.0750  0.1894 -0.0761

(1 ,0 ,.,.) =
  0.0280 -0.2178  0.0914
  0.3227 -0.0121 -0.0016
 -0.0654 -0.0584 -0.1655
[torch.FloatTensor of size 2x1x3x3]
), ('nets.0.bias',
-0.0507
-0.2836
[torch.FloatTensor of size 2]
), ('nets.2.weight',
(0 ,0 ,.,.) =
 -0.2233  0.0279 -0.0511
 -0.0242 -0.1240 -0.0511
  0.2266  0.1385 -0.1070

(0 ,1 ,.,.) =
 -0.0943 -0.1403  0.0979
 -0.2163  0.1906 -0.2269
 -0.1984  0.0843 -0.0719
[torch.FloatTensor of size 1x2x3x3]
), ('nets.2.bias',
-0.1420
[torch.FloatTensor of size 1]
), ('nets.4.weight',
(0 ,0 ,.,.) =
  0.1981 -0.0250  0.2429
  0.3012  0.2428 -0.0114
  0.2878 -0.2134  0.1173
[torch.FloatTensor of size 1x1x3x3]
), ('nets.4.bias',
1.00000e-02 *
 -5.8426
[torch.FloatTensor of size 1]
)])
0 nets.0.weight
1 nets.0.bias
2 nets.2.weight
3 nets.2.bias
4 nets.4.weight
5 nets.4.bias

说明.state_dict()只是把所有模型的参数都以OrderedDict的形式存下来。通过

for key, v in enumerate(pretrained_net):
    print key, v

得知这些参数的顺序!,当然要看具体的值

for key, v in pretrained_net.items():
    print key, v
nets.0.weight
(0 ,0 ,.,.) =
 -0.2444 -0.3148  0.1626
  0.2531 -0.0859 -0.0236
  0.1635  0.1113 -0.1110

(1 ,0 ,.,.) =
  0.2374 -0.2931 -0.1806
 -0.1456  0.2264 -0.0114
  0.1813  0.1134 -0.2095
[torch.FloatTensor of size 2x1x3x3]

nets.0.bias
-0.3087
-0.2407
[torch.FloatTensor of size 2]

nets.2.weight
(0 ,0 ,.,.) =
 -0.2206 -0.1151 -0.0783
  0.0723 -0.2008  0.0568
 -0.0964 -0.1505 -0.1203

(0 ,1 ,.,.) =
  0.0131  0.1329 -0.1763
  0.1276 -0.2025 -0.0075
 -0.1167 -0.1833  0.1103
[torch.FloatTensor of size 1x2x3x3]

nets.2.bias
-0.1858
[torch.FloatTensor of size 1]

nets.4.weight
(0 ,0 ,.,.) =
 -0.1019  0.0534  0.2018
 -0.0600 -0.1389 -0.0275
  0.0696  0.0360  0.1560
[torch.FloatTensor of size 1x1x3x3]

nets.4.bias
1.00000e-03 *
 -5.6003
[torch.FloatTensor of size 1]

如果哪一天我们需要重新写这个网络的,比如使用Net_new,这个网络是将每一层都作为类的一个属性。如果直接load

import torch
import torch.nn as nn

class Net_old(nn.Module):
    def __init__(self):
        super(Net_old, self).__init__()
        self.nets = nn.Sequential(
            torch.nn.Conv2d(1, 2, 3),
            torch.nn.ReLU(True),
            torch.nn.Conv2d(2, 1, 3),
            torch.nn.ReLU(True),
            torch.nn.Conv2d(1, 1, 3)
        )
    def forward(self, x):
        return self.nets(x)

class Net_new(nn.Module):
    def __init__(self):
        super(Net_new, self).__init__()
        self.conv1 = torch.nn.Conv2d(1, 2, 3)
        self.r1 = torch.nn.ReLU(True)
        self.conv2 = torch.nn.Conv2d(2, 1, 3)
        self.r2 = torch.nn.ReLU(True)
        self.conv3 = torch.nn.Conv2d(1, 1, 3)

    def forward(self, x):
        x = self.conv1(x)
        x = self.r1(x)
        x = self.conv2(x)
        x = self.r2(x)
        x = self.conv3(x)
        return x

network = Net_old()
torch.save(network.cpu().state_dict(), 't.pth')

pretrained_net = torch.load('t.pth')

# Show keys of pretrained model
for key, v in pretrained_net.items():
    print key

# Define new network, and directly load the state_dict
new_network = Net_new()
new_network.load_state_dict(pretrained_net)

会出现unexpected key

nets.0.weight
nets.0.bias
nets.2.weight
nets.2.bias
nets.4.weight
nets.4.bias
Traceback (most recent call last):
  File "Blog.py", line 44, in <module>
    new_network.load_state_dict(pretrained_net)
  File "/home/vis/xxx/anaconda2/lib/python2.7/site-packages/torch/nn/modules/module.py", line 522, in load_state_dict
    .format(name))
KeyError: 'unexpected key "nets.0.weight" in state_dict'

这是因为,我们新的网络,都是“属性形式的”,查看新网络的state_dict

conv1.weight
conv1.bias
conv2.weight
conv2.bias
conv3.weight
conv3.bias

strict=False加载模型的正确解读

你可能会决定

import torch
import torch.nn as nn

class Net_old(nn.Module):
    def __init__(self):
        super(Net_old, self).__init__()
        self.nets = nn.Sequential(
            torch.nn.Conv2d(1, 2, 3),
            torch.nn.ReLU(True),
            torch.nn.Conv2d(2, 1, 3),
            torch.nn.ReLU(True),
            torch.nn.Conv2d(1, 1, 3)
        )
    def forward(self, x):
        return self.nets(x)

class Net_new(nn.Module):
    def __init__(self):
        super(Net_new, self).__init__()
        self.conv1 = torch.nn.Conv2d(1, 2, 3)
        self.r1 = torch.nn.ReLU(True)
        self.conv2 = torch.nn.Conv2d(2, 1, 3)
        self.r2 = torch.nn.ReLU(True)
        self.conv3 = torch.nn.Conv2d(1, 1, 3)

    def forward(self, x):
        x = self.conv1(x)
        x = self.r1(x)
        x = self.conv2(x)
        x = self.r2(x)
        x = self.conv3(x)
        return x

old_network = Net_old()
torch.save(old_network.cpu().state_dict(), 't.pth')

pretrained_net = torch.load('t.pth')

# Show keys of pretrained model
for key, v in pretrained_net.items():
    print key
print('****Before loading********')
new_network = Net_new()
print(torch.sum(old_network.nets[0].weight.data))
print(torch.sum(new_network.conv1.weight.data))
for key, _ in new_network.state_dict().items():
    print key
print('-----After loading------')
new_network.load_state_dict(pretrained_net, strict=False)
# So you think that this two values are the same?? Hah!
print(torch.sum(old_network.nets[0].weight.data))
print(torch.sum(new_network.conv1.weight.data))
for key, _ in new_network.state_dict().items():
    print key

输出

nets.0.weight
nets.0.bias
nets.2.weight
nets.2.bias
nets.4.weight
nets.4.bias
****Before loading********
-0.882688805461
0.34207585454
conv1.weight
conv1.bias
conv2.weight
conv2.bias
conv3.weight
conv3.bias
-----After loading------
-0.882688805461
0.34207585454
conv1.weight
conv1.bias
conv2.weight
conv2.bias
conv3.weight
conv3.bias

数值一点变化都没有,说明“strict=False”没有那么智能! 它直接忽略那些没有的dict,有相同的就复制,没有就直接放弃赋值!

import torch
import torch.nn as nn

class Net_old(nn.Module):
    def __init__(self):
        super(Net_old, self).__init__()
        self.nets = nn.Sequential(
            torch.nn.Conv2d(1, 2, 3),
            torch.nn.ReLU(True),
            torch.nn.Conv2d(2, 1, 3),
            torch.nn.ReLU(True),
            torch.nn.Conv2d(1, 1, 3)
        )
    def forward(self, x):
        return self.nets(x)

class Net_new(nn.Module):
    def __init__(self):
        super(Net_new, self).__init__()
        self.conv1 = torch.nn.Conv2d(1, 2, 3)
        self.r1 = torch.nn.ReLU(True)
        self.conv2 = torch.nn.Conv2d(2, 1, 3)
        self.r2 = torch.nn.ReLU(True)
##### 在Net_new也加入了一个'nets'属性
        self.nets = nn.Sequential(
            torch.nn.Conv2d(1, 2, 3)
        )
    def forward(self, x):
        x = self.conv1(x)
        x = self.r1(x)
        x = self.conv2(x)
        x = self.r2(x)
        x = self.conv3(x)
        x = self.nets(x)
        return x

old_network = Net_old()
torch.save(old_network.cpu().state_dict(), 't.pth')

pretrained_net = torch.load('t.pth')

# Show keys of pretrained model
for key, v in pretrained_net.items():
    print key
print('****Before loading********')
new_network = Net_new()
print(torch.sum(old_network.nets[0].weight.data))
print(torch.sum(new_network.conv1.weight.data))
print(torch.sum(new_network.nets[0].weight.data))
for key, _ in new_network.state_dict().items():
    print key
print('-----After loading------')
new_network.load_state_dict(pretrained_net, strict=False)
print(torch.sum(old_network.nets[0].weight.data))
print(torch.sum(new_network.conv1.weight.data))
# Hopefully, this value equals to 'old_network.nets[0].weight'
print(torch.sum(new_network.nets[0].weight.data))
for key, _ in new_network.state_dict().items():
    print key

结果:

nets.0.weight
nets.0.bias
nets.2.weight
nets.2.bias
nets.4.weight
nets.4.bias
****Before loading********
-0.197643771768
0.862508803606
1.21658478677
conv1.weight
conv1.bias
conv2.weight
conv2.bias
conv3.weight
conv3.bias
nets.0.weight
nets.0.bias
-----After loading------
-0.197643771768
0.862508803606
-0.197643771768
conv1.weight
conv1.bias
conv2.weight
conv2.bias
conv3.weight
conv3.bias
nets.0.weight
nets.0.bias

发现After loading之后,预期的两个值一致。
总结:用strict=False进行加载模型,则“能塞则塞,不能塞则丢”。load一般是依据key来加载的,一旦有key不匹配则出错。如果设置strict=False,则直接忽略不匹配的key,对于匹配的key则进行正常的赋值。

Strict=False的用途

所以说,当你一个模型训练好之后,你想往里面加几层,那么strict=False可以很容易的加载预训练的参数(注意检查key是否匹配)。只要key能让其进性匹配则可以进行正确的赋值。

出现unexpected key module.xxx.weight问题

有时候你的模型保存时含有 nn.DataParallel时,就会发现所有的dict都会有 module的前缀。
这时候加载含有module前缀的模型时,可能会出错。其实你只要移除这些前缀即可

  pretrained_net = Net_OLD()
  pretrained_net_dict = torch.load(save_path)
  new_state_dict = OrderedDict()
  for k, v in pretrained_net_dict.items():
      name = k[7:] # remove `module.`
      new_state_dict[name] = v
  # load params
  pretrained_net.load_state_dict(new_state_dict)

总结

  • 保存的Dict是按照net.属性.weight来存储的。如果这个属性是一个Sequential,我们可以类似这样net.seqConvs.0.weight来获得。
    当然在定义的类中,拿到Sequential的某一层用[], 比如self.seqConvs[0].weight.
  • strict=False是没有那么智能,遵循有相同的key则赋值,否则直接丢弃。

附加

由于第一段的问题还没解决,即如何将Sequential定义的网络的模型参数,加载到用“属性一层层”定义的网络中?
下面是一种比较ugly的方法:

import torch
import torch.nn as nn

class Net_old(nn.Module):
    def __init__(self):
        super(Net_old, self).__init__()
        self.nets = nn.Sequential(
            torch.nn.Conv2d(1, 2, 3),
            torch.nn.ReLU(True),
            torch.nn.Conv2d(2, 1, 3),
            torch.nn.ReLU(True),
            torch.nn.Conv2d(1, 1, 3)
        )
    def forward(self, x):
        return self.nets(x)

class Net_new(nn.Module):
    def __init__(self):
        super(Net_new, self).__init__()
        self.conv1 = torch.nn.Conv2d(1, 2, 3)
        self.r1 = torch.nn.ReLU(True)
        self.conv2 = torch.nn.Conv2d(2, 1, 3)
        self.r2 = torch.nn.ReLU(True)
        self.conv3 = torch.nn.Conv2d(1, 1, 3)
    def forward(self, x):
        x = self.conv1(x)
        x = self.r1(x)
        x = self.conv2(x)
        x = self.r2(x)
        x = self.conv3(x)
        x = self.nets(x)
        return x


    def _initialize_weights_from_net(self):
        save_path = 't.pth'
        print('Successfully load model '+save_path)
        # First load the net.
        pretrained_net = Net_old()
        pretrained_net_dict = torch.load(save_path)
        # load params
        pretrained_net.load_state_dict(pretrained_net_dict)

        new_convs = self.get_convs()

        cnt = 0
        # Because sequential is a generator.
                for i, name in enumerate(pretrained_net.nets):
            if isinstance(name, torch.nn.Conv2d):
                print('Assign weight of pretrained model layer : ', name, ' to layer: ', new_convs[cnt])
                new_convs[cnt].weight.data = name.weight.data
                new_convs[cnt].bias.data = name.bias.data
                cnt += 1

    def get_convs(self):
        return [self.conv1, self.conv2, self.conv3]

old_network = Net_old()
torch.save(old_network.cpu().state_dict(), 't.pth')


pretrained_net = torch.load('t.pth')

# Show keys of pretrained model
for key, v in pretrained_net.items():
    print key
print('****Before loading********')
new_network = Net_new()
print(torch.sum(old_network.nets[0].weight.data))
print(torch.sum(new_network.conv1.weight.data))
print('-----New loading method------')
new_network._initialize_weights_from_net()
print(torch.sum(old_network.nets[0].weight.data))
print(torch.sum(new_network.conv1.weight.data))

输出:

nets.0.weight
nets.0.bias
nets.2.weight
nets.2.bias
nets.4.weight
nets.4.bias
****Before loading********
0.510313585401
0.198701560497
-----New loading method------
Successfully load model t.pth
('Assign weight of pretrained model layer : ', Conv2d(1, 2, kernel_size=(3, 3), stride=(1, 1)), ' to layer: ', Conv2d(1, 2, kernel_size=(3, 3), stride=(1, 1)))
('Assign weight of pretrained model layer : ', Conv2d(2, 1, kernel_size=(3, 3), stride=(1, 1)), ' to layer: ', Conv2d(2, 1, kernel_size=(3, 3), stride=(1, 1)))
('Assign weight of pretrained model layer : ', Conv2d(1, 1, kernel_size=(3, 3), stride=(1, 1)), ' to layer: ', Conv2d(1, 1, kernel_size=(3, 3), stride=(1, 1)))
0.510313585401
0.510313585401

搞定!

  • 69
    点赞
  • 225
    收藏
    觉得还不错? 一键收藏
  • 18
    评论
以下是基于VGG16模型的Grad-CAM PyTorch实现代码: ```python import torch import torch.nn as nn import torch.nn.functional as F from torchvision import models from torchvision import transforms from PIL import Image class VGG16(nn.Module): def __init__(self): super(VGG16, self).__init__() self.features = models.vgg16(pretrained=True).features self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) self.classifier = nn.Sequential( nn.Linear(512 * 7 * 7, 4096), nn.ReLU(inplace=True), nn.Dropout(), nn.Linear(4096, 4096), nn.ReLU(inplace=True), nn.Dropout(), nn.Linear(4096, 1000), ) def forward(self, x): x = self.features(x) x = self.avgpool(x) x = torch.flatten(x, 1) x = self.classifier(x) return x class GradCAM: def __init__(self, model): self.model = model.eval() self.feature_maps = [] self.gradient_maps = [] # Registering hooks for feature maps and gradient maps self.model.features.register_forward_hook(self.save_feature_maps) self.model.features.register_backward_hook(self.save_gradient_maps) def save_feature_maps(self, module, input, output): # Save feature maps during forward pass self.feature_maps.append(output) def save_gradient_maps(self, module, grad_input, grad_output): # Save gradient maps during backward pass self.gradient_maps.append(grad_output[0]) def forward(self, x): return self.model(x) def backward(self, idx): # Calculate gradients of the output with respect to feature maps self.model.zero_grad() grad_output = torch.zeros_like(self.gradient_maps[-1]) grad_output[0][idx] = 1 self.gradient_maps[-1].backward(gradient=grad_output) def generate(self, x, idx): # Forward pass to get the predicted class self.forward(x) # Backward pass to get the gradients self.backward(idx) # Pool the gradients over the feature maps and normalize pooled_gradients = torch.mean(self.gradient_maps[-1], dim=[2, 3]) feature_maps = self.feature_maps[-1] for i in range(feature_maps.shape[1]): feature_maps[:, i, :, :] *= pooled_gradients[i] heatmap = torch.mean(feature_maps, dim=1).squeeze().detach().numpy() heatmap = np.maximum(heatmap, 0) heatmap /= np.max(heatmap) # Resize the heatmap to match the input image size heatmap = cv2.resize(heatmap, (x.shape[3], x.shape[2])) # Convert heatmap to RGB heatmap = np.uint8(255 * heatmap) heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET) # Superimpose the heatmap on the input image superimposed_img = np.uint8(0.5 * x[0].permute(1, 2, 0).detach().numpy() + 0.5 * heatmap) return superimposed_img # Load the pre-trained VGG16 model model = VGG16() # Create GradCAM object gradcam = GradCAM(model) # Load the input image img = Image.open('input.jpg').convert('RGB') # Preprocess the input image transform = transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) input_tensor = transform(img).unsqueeze(0) # Get the predicted class index output = gradcam.forward(input_tensor) predicted_idx = torch.argmax(output).item() # Generate the Grad-CAM heatmap cam = gradcam.generate(input_tensor, predicted_idx) # Save the output image output_img = Image.fromarray(cam) output_img.save('output.jpg') ``` 这段代码包括了VGG16模型的定义、Grad-CAM的实现、输入图像的预处理以及结果图像的保存。你只需将`input.jpg`替换为你自己的输入图像,运行代码即可得到Grad-CAM可视化结果图像`output.jpg`。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 18
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值