pytorch2维模型转换为3维模型，并使用2维模型参数初始化3维模型——以resnext为例

Abo_luo

已于 2023-05-06 11:26:18 修改

阅读量1k

点赞数 1

文章标签：深度学习 cnn 计算机视觉 pytorch 神经网络

于 2023-05-05 14:47:06 首次发布

本文链接：https://blog.csdn.net/Abo_luo/article/details/130506014

版权

pytorch2维模型转换为3维模型——以resnext为例

2D网络结构转换到3D网络结构
Inflate 初始化3D网络的参数
完整结构主要分为2步骤
- 2维resnext网络结构
- 3维resnext网络结构
总结

最近在整理视频理解相关领域的算法，看了I3D的文章，觉得使用2D模型的预训练初始化3D网络的模型参数是一个非常重要的步骤（提点），因此写一篇博客进行记录，希望对大家有帮助。

2D网络结构转换到3D网络结构

3D网络相比较于2D网络主要是数据多了一个时空维度BCHW 到 BCTHW
因此将2D网络中的2D结构转换为3D结构，注意要设计好3D的卷积核、stride、padding等参数，主要有：

nn.BatchNorm2d        -> nn.BatchNorm3d
nn.Conv2d             -> nn.Conv3d
nn.AdaptiveAvgPool2d  -> nn.AdaptiveAvgPool3d

Inflate 初始化3D网络的参数

在之前的博客基础上进行了修改（https://blog.csdn.net/Abo_luo/article/details/117304751?spm=1001.2014.3001.5502）

def transfer_model(pretrained_file, model):
    pretrained_dict = torch.load(pretrained_file)  # get pretrained dict
    model_dict = model.state_dict()  # get model dict
    # 在合并前(update),需要去除pretrained_dict一些不需要的参数
    pretrained_dict = transfer_state_dict(pretrained_dict, model_dict)
    model_dict.update(pretrained_dict)  # 更新(合并)模型的参数
    model.load_state_dict(model_dict)
    return model

def transfer_state_dict(pretrained_dict, model_dict):
    q = 0
    for (k1, v1), (k2, v2) in zip(pretrained_dict.items(), model_dict.items()):
        if v1.shape == v2.shape:
            model_dict[k2] = pretrained_dict[k1]
            q += 1
        else:
        	#膨胀2D结构的参数
            temp_dim = v2.shape[2]
            model_dict[k2] = torch.unsqueeze(v1, 2).repeat(1, 1, temp_dim, 1, 1) / temp_dim
            q += 1
    print('模型总结构数%d个，成功加载参数%d个'%(len(model_dict),q))
    return model_dict

完整结构主要分为2步骤

1、找到2d网络的代码，并修改为3d的代码
2、找到对应的2d网络权重（下面代码中用的是timm库提供好的预训练权重，但是通常要模型适配，可以修改上面的transfer_state_dict函数），并迁移到3d网络中

2维resnext网络结构

import math
import torch
import torch.nn as nn
import torch.nn.functional as F
import timm
CARDINALITY = 32
DEPTH = 4
BASEWIDTH = 64

def transfer_model(pretrained_file, model):
    pretrained_dict = torch.load(pretrained_file)  # get pretrained dict
    model_dict = model.state_dict()  # get model dict
    # 在合并前(update),需要去除pretrained_dict一些不需要的参数
    pretrained_dict = transfer_state_dict(pretrained_dict, model_dict)
    print('成功加载参数%d个'%len(pretrained_dict))
    model_dict.update(pretrained_dict)  # 更新(合并)模型的参数
    model.load_state_dict(model_dict)
    return model

def transfer_state_dict(pretrained_dict, model_dict):
    state_dict = {}
    for (k1, v1), (k2, v2) in zip(pretrained_dict.items(), model_dict.items()):
        if v1.shape == v2.shape:
            state_dict[k2] = pretrained_dict[k1]
        else:
            print('%s形状不匹配'%k2)
    return state_dict

class ResNextBottleNeckC(nn.Module):
    def __init__(self, in_channels, out_channels, stride):
        super().__init__()
        C = CARDINALITY
        D = int(DEPTH * out_channels / BASEWIDTH) #number of channels per group
        self.split_transforms = nn.Sequential(
            nn.Conv2d(in_channels, C * D, kernel_size=1, groups=1, bias=False),
            nn.BatchNorm2d(C * D),
            nn.ReLU(inplace=True),
            nn.Conv2d(C * D, C * D, kernel_size=3, stride=stride, groups=C, padding=1, bias=False),
            nn.BatchNorm2d(C * D),
            nn.ReLU(inplace=True),
            nn.Conv2d(C * D, out_channels * 4, kernel_size=1, bias=False),
            nn.BatchNorm2d(out_channels * 4),
        )

        self.shortcut = nn.Sequential()

        if stride != 1 or in_channels != out_channels * 4:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels * 4, stride=stride, kernel_size=1, bias=False),
                nn.BatchNorm2d(out_channels * 4)
            )

    def forward(self, x):
        return F.relu(self.split_transforms(x) + self.shortcut(x))

class ResNext(nn.Module):

    def __init__(self, block, num_blocks, class_names=100):
        super().__init__()
        self.in_channels = 64

        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 64, 7, stride=1, padding=3, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True)
        )

        self.conv2 = self._make_layer(block, num_blocks[0], 64, 1)
        self.conv3 = self._make_layer(block, num_blocks[1], 128, 2)
        self.conv4 = self._make_layer(block, num_blocks[2], 256, 2)
        self.conv5 = self._make_layer(block, num_blocks[3], 512, 2)
        self.avg = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * 4, class_names)

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.conv5(x)
        x = self.avg(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

    def _make_layer(self, block, num_block, out_channels, stride):
        strides = [stride] + [1] * (num_block - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_channels, out_channels, stride))
            self.in_channels = out_channels * 4

        return nn.Sequential(*layers)

def resnext50(num_class):
    """ return a resnext50(c32x4d) network
    """
    return ResNext(ResNextBottleNeckC, [3, 4, 6, 3],class_names=num_class)

def resnext101():
    """ return a resnext101(c32x4d) network
    """
    return ResNext(ResNextBottleNeckC, [3, 4, 23, 3])

def resnext152():
    """ return a resnext101(c32x4d) network
    """
    return ResNext(ResNextBottleNeckC, [3, 4, 36, 3])


if __name__ == '__main__':
	#使用timm的resnext预训练权重
    model_names = timm.list_models(pretrained=True)
    print("支持的预训练模型数量：%s" % model_names)
    model = timm.create_model('resnext50d_32x4d',pretrained=True)
    torch.save(model.state_dict(),'./resnext50.pth')
	
	#定义自己的2d resnext网络，并使用timm的预训练权重
    model = resnext50(num_class=2)
    model = transfer_model("./resnext50.pth", model)
    #并保存resnext50_after_timm.pth，为了下一个程序不需要做key匹配
    torch.save(model.state_dict(),'./resnext50_after_timm.pth')
    
    input = torch.randn(2,3,224,224)
    output = model(input)
    print(output.shape)

3维resnext网络结构

import math
import torch
import torch.nn as nn
import torch.nn.functional as F
import timm

CARDINALITY = 32
DEPTH = 4
BASEWIDTH = 64

def transfer_model(pretrained_file, model):
    pretrained_dict = torch.load(pretrained_file)  # get pretrained dict
    model_dict = model.state_dict()  # get model dict
    # 在合并前(update),需要去除pretrained_dict一些不需要的参数
    pretrained_dict = transfer_state_dict(pretrained_dict, model_dict)
    model_dict.update(pretrained_dict)  # 更新(合并)模型的参数
    model.load_state_dict(model_dict)
    return model

def transfer_state_dict(pretrained_dict, model_dict):
    q = 0
    for (k1, v1), (k2, v2) in zip(pretrained_dict.items(), model_dict.items()):
        if v1.shape == v2.shape:
            model_dict[k2] = pretrained_dict[k1]
            q += 1
        else:
            temp_dim = v2.shape[2]
            model_dict[k2] = torch.unsqueeze(v1, 2).repeat(1, 1, temp_dim, 1, 1) / temp_dim
            q += 1
    print('模型总结构数%d个，成功加载参数%d个'%(len(model_dict),q))
    return model_dict

class ResNextBottleNeckC(nn.Module):

    def __init__(self, in_channels, out_channels, stride):
        super().__init__()

        C = CARDINALITY
        D = int(DEPTH * out_channels / BASEWIDTH) #number of channels per group
        self.split_transforms = nn.Sequential(
            nn.Conv3d(in_channels, C * D, kernel_size=1, groups=1, bias=False),
            nn.BatchNorm3d(C * D),
            nn.ReLU(inplace=True),
            nn.Conv3d(C * D, C * D, kernel_size=3, stride=(1,stride,stride), groups=C, padding=1, bias=False),
            nn.BatchNorm3d(C * D),
            nn.ReLU(inplace=True),
            nn.Conv3d(C * D, out_channels * 4, kernel_size=1, bias=False),
            nn.BatchNorm3d(out_channels * 4),
        )

        self.shortcut = nn.Sequential()

        if stride != 1 or in_channels != out_channels * 4:
            self.shortcut = nn.Sequential(
                nn.Conv3d(in_channels, out_channels * 4, stride=(1,stride,stride), kernel_size=1, bias=False),
                nn.BatchNorm3d(out_channels * 4)
            )

    def forward(self, x):
        return F.relu(self.split_transforms(x) + self.shortcut(x))

class ResNext(nn.Module):

    def __init__(self, block, num_blocks, class_names=100):
        super().__init__()
        self.in_channels = 64

        self.conv1 = nn.Sequential(
            nn.Conv3d(3, 64, 7, stride=(2,2,2), padding=3, bias=False),
            nn.BatchNorm3d(64),
            nn.ReLU(inplace=True)
        )

        self.conv2 = self._make_layer(block, num_blocks[0], 64, 1)
        self.conv3 = self._make_layer(block, num_blocks[1], 128, 2)
        self.conv4 = self._make_layer(block, num_blocks[2], 256, 2)
        self.conv5 = self._make_layer(block, num_blocks[3], 512, 2)
        self.avg = nn.AdaptiveAvgPool3d((1, 1, 1))
        self.fc = nn.Linear(512 * 4, class_names)

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.conv5(x)
        x = self.avg(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

    def _make_layer(self, block, num_block, out_channels, stride):
        strides = [stride] + [1] * (num_block - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_channels, out_channels, stride))
            self.in_channels = out_channels * 4

        return nn.Sequential(*layers)

def resnext50(num_class):
    """ return a resnext50(c32x4d) network
    """
    return ResNext(ResNextBottleNeckC, [3, 4, 6, 3],class_names=num_class)

def resnext101():
    """ return a resnext101(c32x4d) network
    """
    return ResNext(ResNextBottleNeckC, [3, 4, 23, 3])

def resnext152():
    """ return a resnext101(c32x4d) network
    """
    return ResNext(ResNextBottleNeckC, [3, 4, 36, 3])



if __name__ == '__main__':
    model = resnext50(2).cuda()
    model = transfer_model("./resnext50_after_timm.pth", model)
    
    input = torch.randn(2,3,10,224,224).cuda()
    output = model(input)
    print(output.shape)

总结

上述第一段程序中主要需要修改的是transfer_state_dict函数，第二段程序(3D)和第一段程序（2D）模型的num_class需要相等，因为在第二段程序中本来就存在网络参数形状不匹配，因此没有做相关判断，直接使用了key（顺序）进行迁移。模型权重链接：https://pan.baidu.com/s/1AH1yJP5DWrEL7hqQByVStQ?pwd=x9l2
提取码：x9l2

Abo_luo

关注

1
点赞
踩
6

收藏

觉得还不错? 一键收藏
0
评论
pytorch2维模型转换为3维模型，并使用2维模型参数初始化3维模型——以resnext为例

上述第一段程序中主要需要修改的是transfer_state_dict函数，第二段程序(3D)和第一段程序（2D）模型的num_class需要相等，因为在第二段程序中本来就存在网络参数形状不匹配，因此没有做相关判断，直接使用了key（顺序）进行迁移。
复制链接

扫一扫