convmixer代码

import torch
import torch.nn as nn
from torchsummary import summary

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


class ConvMixerLayer(nn.Module):
    def __init__(self, dim, kernel_size=9):
        super().__init__()
        self.Resnet = nn.Sequential(
            nn.Conv2d(dim, dim, kernel_size=kernel_size, groups=dim, padding='same'),
            nn.GELU(),
            nn.BatchNorm2d(dim)
        )
        self.Conv_1x1 = nn.Sequential(
            nn.Conv2d(dim, dim, kernel_size=1),
            nn.GELU(),
            nn.BatchNorm2d(dim)
        )

    def forward(self, x):
        x = x + self.Resnet(x)
        x = self.Conv_1x1(x)
        return x


class ConvMixer(nn.Module):
    def __init__(self, dim=512, depth=5, kernel_size=5, patch_size=7, n_classes=1000):
        super().__init__()
        self.conv2d1 = nn.Sequential(
            nn.Conv2d(3, dim, kernel_size=patch_size, stride=patch_size),
            nn.GELU(),
            nn.BatchNorm2d(dim)
        )
        self.ConvMixer_blocks = nn.ModuleList([])

        for _ in range(depth):
            self.ConvMixer_blocks.append(ConvMixerLayer(dim=dim, kernel_size=kernel_size))

        self.head = nn.Sequential(
            nn.AdaptiveAvgPool2d((1, 1)),
            nn.Flatten(),
            nn.Linear(dim, n_classes)
        )

    def forward(self, x):
        x = self.conv2d1(x)

        for ConvMixer_block in self.ConvMixer_blocks:
            x = ConvMixer_block(x)

        x = self.head(x)

        return x

#
if __name__ == '__main__':
    model = ConvMixer(dim=512, depth=2).to(device)
    print(model)
    summary(model, (3, 224, 224))

运行结果:


ConvMixer(
  (conv2d1): Sequential(
    (0): Conv2d(3, 512, kernel_size=(7, 7), stride=(7, 7))
    (1): GELU()
    (2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (ConvMixer_blocks): ModuleList(
    (0): ConvMixerLayer(
      (Resnet): Sequential(
        (0): Conv2d(512, 512, kernel_size=(5, 5), stride=(1, 1), padding=same, groups=512)
        (1): GELU()
        (2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (Conv_1x1): Sequential(
        (0): Conv2d(512, 512, kernel_size=(1, 1), stride=(1, 1))
        (1): GELU()
        (2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (1): ConvMixerLayer(
      (Resnet): Sequential(
        (0): Conv2d(512, 512, kernel_size=(5, 5), stride=(1, 1), padding=same, groups=512)
        (1): GELU()
        (2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (Conv_1x1): Sequential(
        (0): Conv2d(512, 512, kernel_size=(1, 1), stride=(1, 1))
        (1): GELU()
        (2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
  )
  (head): Sequential(
    (0): AdaptiveAvgPool2d(output_size=(1, 1))
    (1): Flatten(start_dim=1, end_dim=-1)
    (2): Linear(in_features=512, out_features=1000, bias=True)
  )
)
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
================================================================
            Conv2d-1          [-1, 512, 32, 32]          75,776
              GELU-2          [-1, 512, 32, 32]               0
       BatchNorm2d-3          [-1, 512, 32, 32]           1,024
            Conv2d-4          [-1, 512, 32, 32]          13,312
              GELU-5          [-1, 512, 32, 32]               0
       BatchNorm2d-6          [-1, 512, 32, 32]           1,024
            Conv2d-7          [-1, 512, 32, 32]         262,656
              GELU-8          [-1, 512, 32, 32]               0
       BatchNorm2d-9          [-1, 512, 32, 32]           1,024
   ConvMixerLayer-10          [-1, 512, 32, 32]               0
           Conv2d-11          [-1, 512, 32, 32]          13,312
             GELU-12          [-1, 512, 32, 32]               0
      BatchNorm2d-13          [-1, 512, 32, 32]           1,024
           Conv2d-14          [-1, 512, 32, 32]         262,656
             GELU-15          [-1, 512, 32, 32]               0
      BatchNorm2d-16          [-1, 512, 32, 32]           1,024
   ConvMixerLayer-17          [-1, 512, 32, 32]               0
AdaptiveAvgPool2d-18            [-1, 512, 1, 1]               0
          Flatten-19                  [-1, 512]               0
           Linear-20                 [-1, 1000]         513,000
================================================================
Total params: 1,145,832
Trainable params: 1,145,832
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.57
Forward/backward pass size (MB): 68.02
Params size (MB): 4.37
Estimated Total Size (MB): 72.96
----------------------------------------------------------------



  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

龙庭花雨落

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值