ResNet18 源码批注 待补充模型图
研0状态,老师让看多模态方面论文,读代码,小白新手上路。
这部分ResNet18 是《VISUALVOICE: Audio-Visual Speech Separation with Cross-Modal Consistency》
这篇论文里用到的。代码注释结合了B站up:霹雳吧啦Wz的理论讲解和网络搭建(自己看了一上午不如B站30min)
源码:https://github.com/facebookresearch/VisualVoice
代码及注释
#!/usr/bin/env python
# Copyright (c) Facebook, Inc. and its affiliates.
# All rights reserved.
#
# This source code is licensed under: https://github.com/mpc001/Lipreading_using_Temporal_Convolutional_Networks/blob/master/LICENSE
# Ack: Code taken from Pingchuan Ma: https://github.com/mpc001/Lipreading_using_Temporal_Convolutional_Networks
import math
import torch.nn as nn
import pdb
#使用bn层 不适用bias
def conv3x3(in_planes, out_planes, stride=1):
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=1, bias=False)
#bn层 去除绝对差异 突出相对差异 适用于分类 步骤 求均值 方差 归一 调参 做归一化处理
def downsample_basic_block( inplanes, outplanes, stride ):
return nn.Sequential(
nn.Conv2d(inplanes, outplanes, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(outplanes),
)
def downsample_basic_block_v2( inplanes, outplanes, stride ):
return nn.Sequential(
nn.AvgPool2d(kernel_size=stride, stride=stride, ceil_mode=True, count_include_pad=False),
nn.Conv2d(inplanes, outplanes, kernel_size=1, stride=1, bias=False),
nn.BatchNorm2d(outplanes),
)
#残差结构
class BasicBlock(nn.Module):
#resnet18中卷积核的个数没有发生变化 so =1
expansion = 1
#inplanes 输入特征矩阵深度 planes输出特征矩阵深度(卷积核个数 downsample需要匹配尺度时用
def __init__(self, inplanes, planes, stride=1, downsample=None, relu_type = 'relu' ):
super(BasicBlock, self).__init__()
assert relu_type in ['relu','prelu']
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = nn.BatchNorm2d(planes)
#relu激活函数 大于0为原值 小于0取0 inplace=true则不保留原值 一般设置为false
# type of ReLU is an input option
if relu_type == 'relu':
self.relu1 = nn.ReLU(inplace=True)
self.relu2 = nn.ReLU(inplace=True)
elif relu_type == 'prelu':
self.relu1 = nn.PReLU(num_parameters=planes)
self.relu2 = nn.PReLU(num_parameters=planes)
else:
raise Exception('relu type not implemented')
# --------
self.conv2 = conv3x3(planes, planes)
self.bn2 = nn.BatchNorm2d(planes)
#none
self.downsample = downsample
self.stride = stride
#正向传播过程
def forward(self, x):
residual = x #保留 残差结构
out = self.conv1(x)
out = self.bn1(out)
out = self.relu1(out)
out = self.conv2(out)
out = self.bn2(out)
#resnet18中未用到
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu2(out)
return out
class ResNet(nn.Module):
#block———>basicblock(resnet18 layers———>残差结构的个数 num_classes:训练集的分类个数
def __init__(self, block, layers, num_classes=1000, relu_type = 'relu', gamma_zero = False, avg_pool_downsample = False):
self.inplanes = 64 #经历过maxpooling后 输入特征矩阵深度
self.relu_type = relu_type
self.gamma_zero = gamma_zero
#downsample_basic_block conv2d+bn
self.downsample_block = downsample_basic_block_v2 if avg_pool_downsample else downsample_basic_block
super(ResNet, self).__init__()
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
self.avgpool = nn.AdaptiveAvgPool2d(1)
# default init
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
#nn.init.ones_(m.weight)
#nn.init.zeros_(m.bias)
if self.gamma_zero:
for m in self.modules():
if isinstance(m, BasicBlock ):
m.bn2.weight.data.zero_()
#block = basicblock blocks是所需残差结构个数
def _make_layer(self, block, planes, blocks, stride=1):
downsample = None
#ResNet18中不需要
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = self.downsample_block( inplanes = self.inplanes,
outplanes = planes * block.expansion,
stride = stride )
layers = []
#block定义
layers.append(block(self.inplanes, planes, stride, downsample, relu_type = self.relu_type))
self.inplanes = planes * block.expansion
#加入残差结构
for i in range(1, blocks):
layers.append(block(self.inplanes, planes, relu_type = self.relu_type))
#封装返回
return nn.Sequential(*layers)
def forward(self, x):
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.avgpool(x)
#为了将前面多维度的tensor展平成一维
x = x.view(x.size(0), -1)
return x