ResNet18 源码批注待补充模型图

最新推荐文章于 2023-09-08 10:43:12 发布

小佟小佟与众不同

最新推荐文章于 2023-09-08 10:43:12 发布

阅读量222

点赞数

文章标签：深度学习神经网络 pytorch

本文链接：https://blog.csdn.net/TOngMay/article/details/120026252

版权

ResNet18 源码批注待补充模型图

研0状态，老师让看多模态方面论文，读代码，小白新手上路。
这部分ResNet18 是《VISUALVOICE: Audio-Visual Speech Separation with Cross-Modal Consistency》
这篇论文里用到的。代码注释结合了B站up：霹雳吧啦Wz的理论讲解和网络搭建（自己看了一上午不如B站30min）

源码：https://github.com/facebookresearch/VisualVoice

代码及注释

#!/usr/bin/env python

# Copyright (c) Facebook, Inc. and its affiliates.
# All rights reserved.
#
# This source code is licensed under: https://github.com/mpc001/Lipreading_using_Temporal_Convolutional_Networks/blob/master/LICENSE

# Ack: Code taken from Pingchuan Ma: https://github.com/mpc001/Lipreading_using_Temporal_Convolutional_Networks

import math
import torch.nn as nn
import pdb


#使用bn层 不适用bias
def conv3x3(in_planes, out_planes, stride=1):
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                     padding=1, bias=False)

#bn层 去除绝对差异 突出相对差异 适用于分类 步骤 求均值 方差 归一  调参 做归一化处理
def downsample_basic_block( inplanes, outplanes, stride ):
    return  nn.Sequential(
                nn.Conv2d(inplanes, outplanes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(outplanes),
            )

def downsample_basic_block_v2( inplanes, outplanes, stride ):
    return  nn.Sequential(
                nn.AvgPool2d(kernel_size=stride, stride=stride, ceil_mode=True, count_include_pad=False),
                nn.Conv2d(inplanes, outplanes, kernel_size=1, stride=1, bias=False),
                nn.BatchNorm2d(outplanes),
            )


#残差结构
class BasicBlock(nn.Module):
    #resnet18中卷积核的个数没有发生变化 so =1
    expansion = 1
    #inplanes 输入特征矩阵深度 planes输出特征矩阵深度（卷积核个数 downsample需要匹配尺度时用
    def __init__(self, inplanes, planes, stride=1, downsample=None, relu_type = 'relu' ):
        super(BasicBlock, self).__init__()

        assert relu_type in ['relu','prelu']

        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = nn.BatchNorm2d(planes)
#relu激活函数 大于0为原值  小于0取0  inplace=true则不保留原值  一般设置为false
        # type of ReLU is an input option
        if relu_type == 'relu':
            self.relu1 = nn.ReLU(inplace=True)
            self.relu2 = nn.ReLU(inplace=True)
        elif relu_type == 'prelu':
            self.relu1 = nn.PReLU(num_parameters=planes)
            self.relu2 = nn.PReLU(num_parameters=planes)
        else:
            raise Exception('relu type not implemented')
        # --------

        self.conv2 = conv3x3(planes, planes)
        self.bn2 = nn.BatchNorm2d(planes)
        #none
        self.downsample = downsample
        self.stride = stride
    #正向传播过程
    def forward(self, x):
        residual = x #保留 残差结构
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu1(out)
        out = self.conv2(out)
        out = self.bn2(out)
        #resnet18中未用到
        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu2(out)

        return out


class ResNet(nn.Module):
    #block———>basicblock(resnet18  layers———>残差结构的个数 num_classes：训练集的分类个数
    def __init__(self, block, layers, num_classes=1000, relu_type = 'relu', gamma_zero = False, avg_pool_downsample = False):
        self.inplanes = 64 #经历过maxpooling后 输入特征矩阵深度
        self.relu_type = relu_type
        self.gamma_zero = gamma_zero
        #downsample_basic_block  conv2d+bn
        self.downsample_block = downsample_basic_block_v2 if avg_pool_downsample else downsample_basic_block

        super(ResNet, self).__init__()
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d(1)

        # default init
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
                #nn.init.ones_(m.weight)
                #nn.init.zeros_(m.bias)

        if self.gamma_zero:
            for m in self.modules():
                if isinstance(m, BasicBlock ):
                    m.bn2.weight.data.zero_()
    #block = basicblock blocks是所需残差结构个数
    def _make_layer(self, block, planes, blocks, stride=1):


        downsample = None
        #ResNet18中不需要
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = self.downsample_block( inplanes = self.inplanes, 
                                                 outplanes = planes * block.expansion, 
                                                 stride = stride )

        layers = []
        #block定义
        layers.append(block(self.inplanes, planes, stride, downsample, relu_type = self.relu_type))
        self.inplanes = planes * block.expansion
        #加入残差结构
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes, relu_type = self.relu_type))
        #封装返回
        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.avgpool(x)
        #为了将前面多维度的tensor展平成一维
        x = x.view(x.size(0), -1)
        return x

小佟小佟与众不同

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
ResNet18 源码批注待补充模型图

ResNet18 源码批注待补充模型图研0状态，老师让看多模态方面论文，读代码，小白新手上路。这部分ResNet18 是《VISUALVOICE: Audio-Visual Speech Separation with Cross-Modal Consistency》这篇论文里用到的。代码注释结合了B站up：霹雳吧啦Wz的理论讲解和网络搭建（自己看了一上午不如B站30min）源码：https://github.com/facebookresearch/VisualVoice代码及注释#!/us
复制链接

扫一扫