读 DeepFake detection with multi-scale convolution and vision transformer 第一期：MBConv

CON_DUCT

已于 2023-05-13 15:21:18 修改

阅读量837

点赞数

分类专栏： deepfake 文章标签：深度学习 python

于 2023-05-11 22:48:03 首次发布

本文链接：https://blog.csdn.net/weixin_50281113/article/details/130632294

版权

deepfake 专栏收录该内容

5 篇文章 0 订阅

订阅专栏

MBConv

MBConv由下图组成。MBConv6指第一层1*1卷积层的卷积核个数为6，若是MBConv1则第一层可以省略。
在这里插入图片描述
以下是MBConv网络的详细代码及注释，主要是根据上图的流程搭建。

class InvertedResidualConfig:
    # kernel_size, in_channel, out_channel, exp_ratio, strides, use_SE, drop_connect_rate
    def __init__(self,
                 # 3或者5
                 kernel: int,
                 input_c: int,
                 out_c: int,
                 # 1或者6
                 expanded_ratio: int,
                 # 1或者2
                 stride: int,
                 # 恒为true
                 use_se: bool,
                 drop_rate: float,
                 index: str,
                 width_coefficient: float):
        self.input_c = self.adjust_channels(input_c, width_coefficient)
        self.kernel = kernel
        self.expanded_c = self.input_c * expanded_ratio
        self.out_c = self.adjust_channels(out_c, width_coefficient)
        self.use_se = use_se
        self.stride = stride
        self.drop_rate = drop_rate
        self.index = index

    @staticmethod
    def adjust_channels(channels: int, width_coefficient: float):
        return _make_divisible(channels * width_coefficient, 8)


class InvertedResidual(nn.Module):
    def __init__(self,
                 # 配置参数
                 cnf: InvertedResidualConfig,
                 # 指定回调函数返回值类型为nn.Module
                 norm_layer: Callable[..., nn.Module]):
        # 继承了nn.Module的init
        super(InvertedResidual, self).__init__()
        # 异常处理，stride只为1或2
        if cnf.stride not in [1, 2]:
            raise ValueError("illegal stride value.")
        # stride取决了是否有residual操作，因为要保证维数相同所以stride为1时才有residual操作
        self.use_res_connect = (cnf.stride == 1 and cnf.input_c == cnf.out_c)

        # 使用字典应该是为了后续的方便
        layers = OrderedDict()
        # 也就i是流程图中的Swish
        activation_layer = nn.SiLU

        # 第一个Conv1*1的卷积核个数是n倍，如果n=1的话则跳过这一层
        if cnf.expanded_c != cnf.input_c:
            layers.update({"expand_conv": ConvBNActivation(cnf.input_c,
                                                           cnf.expanded_c,
                                                           kernel_size=1,
                                                           norm_layer=norm_layer,
                                                           activation_layer=activation_layer)})

        # depthwise
        layers.update({"dwconv": ConvBNActivation(cnf.expanded_c,
                                                  cnf.expanded_c,
                                                  kernel_size=cnf.kernel,
                                                  stride=cnf.stride,
                                                  groups=cnf.expanded_c,
                                                  norm_layer=norm_layer,
                                                  activation_layer=activation_layer)})

        # 默认MbConv需要注意力机制se
        if cnf.use_se:
            layers.update({"se": SqueezeExcitation(cnf.input_c,
                                                   cnf.expanded_c)})

        # project
        layers.update({"project_conv": ConvBNActivation(cnf.expanded_c,
                                                        cnf.out_c,
                                                        kernel_size=1,
                                                        norm_layer=norm_layer,
                                                        # 这里是不需要激活函数的
                                                        activation_layer=nn.Identity)})
        # 将每一层通过Sequential打包成一个block
        self.block = nn.Sequential(layers)
        self.out_channels = cnf.out_c
        self.is_strided = cnf.stride > 1

        # 只有在使用shortcut连接时才使用dropout层
        if self.use_res_connect and cnf.drop_rate > 0:
            self.dropout = DropPath(cnf.drop_rate)
        else:
            self.dropout = nn.Identity()

    # 表明输入输出都为tensor
    def forward(self, x: Tensor) -> Tensor:
        result = self.block(x)
        result = self.dropout(result)
        if self.use_res_connect:
            result += x

        return result

Conv，BN，Swish

# 这里将Conv和BN，Swish结合起来
class ConvBNActivation(nn.Sequential):
    def __init__(self,
                 in_planes: int,
                 out_planes: int,
                 kernel_size: int = 3,
                 stride: int = 1,
                 # 是否为DW卷积
                 groups: int = 1,
                 norm_layer: Optional[Callable[..., nn.Module]] = None,
                 activation_layer: Optional[Callable[..., nn.Module]] = None):
        # 根据kernel_size计算padding
        padding = (kernel_size - 1) // 2
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        if activation_layer is None:
            activation_layer = nn.SiLU
        # 这个类继承的不是Module，是Sequential，所以不需要写前向传播函数，同时利用super继承父类的init
        # 顺便将网络搭好，在初始化Sequential后会自动进行前向传播，这似乎是一个常见的写法但是我之前没有见过。
        super(ConvBNActivation, self).__init__(nn.Conv2d(in_channels=in_planes,
                                                         out_channels=out_planes,
                                                         kernel_size=kernel_size,
                                                         stride=stride,
                                                         padding=padding,
                                                         groups=groups,
                                                         # 因为BN的均值环节，完全无视卷积的bias效果
                                                         bias=False),
                                               norm_layer(out_planes),
                                               activation_layer())

注意力机制SE

在这里插入图片描述

class SqueezeExcitation(nn.Module):
    def __init__(self,
                 input_c: int,
                 # 经过DW卷积之后channel没有发生变化依然为expand_c
                 expand_c: int,
                 squeeze_factor: int = 4):
        super(SqueezeExcitation, self).__init__()
        # 第一个全连接层是为了降维，这里规定除以4
        squeeze_c = input_c // squeeze_factor
        # 全连接常用1*1的卷积代替可能是因为在卷积上做了一些优化
        self.fc1 = nn.Conv2d(expand_c, squeeze_c, 1)
        self.ac1 = nn.SiLU()
        self.fc2 = nn.Conv2d(squeeze_c, expand_c, 1)
        self.ac2 = nn.Sigmoid()

    def forward(self, x: Tensor) -> Tensor:
        scale = F.adaptive_avg_pool2d(x, output_size=(1, 1))
        scale = self.fc1(scale)
        scale = self.ac1(scale)
        scale = self.fc2(scale)
        scale = self.ac2(scale)
        return scale * x

CON_DUCT

关注

0
点赞
踩
2

收藏

觉得还不错? 一键收藏
0
评论
读 DeepFake detection with multi-scale convolution and vision transformer 第一期：MBConv

MBConv由下图组成。MBConv6指第一层1*1卷积层的卷积核个数为6，若是MBConv1则第一层可以省略。以下是MBConv网络的详细代码及注释，主要是根据上图的流程搭建。
复制链接

扫一扫