[Releases · MzeroMiko/VMamba · GitHub](https://github.com/MzeroMiko/VMamba/releases)
安装torchvision等:
尝试版本1:wins上python=3.10, torch=1.13.1 成功
pip install torchvision==0.14.1+cu117 torchaudio==0.13.1 --extra-index-url https://download.pytorch.org/whl/cu117
尝试版本2 :wins上python=3.10, torch=2.1.0
pip install torchvision==0.16.0+cu118 torchaudio==2.1.0 --extra-index-url https://download.pytorch.org/whl/cu118
模型pipline:
熟悉模型结构:
import os
from functools import partial
from typing import Callable
import torch
from torch import nn
from torch.utils import checkpoint
from mmengine.model import BaseModule
from mmdet.registry import MODELS as MODELS_MMDET
from mmseg.registry import MODELS as MODELS_MMSEG
def import_abspy(name="models", path="classification/"):
import sys
import importlib
path = os.path.abspath(path)
assert os.path.isdir(path)
sys.path.insert(0, path)
module = importlib.import_module(name)
sys.path.pop(0)
#打印
print("detection imported module: {}".format(module.__name__))
return module
def main():
build = import_abspy(
"models",
os.path.join(os.path.dirname(os.path.abspath(__file__)), "../classification/"),
)
Backbone_VSSM = build.vmamba.Backbone_VSSM
class MM_VSSM(BaseModule, Backbone_VSSM):
def __init__(self, *args, **kwargs):
BaseModule.__init__(self)
Backbone_VSSM.__init__(self, *args, **kwargs)
# 测试初始化 MM_VSSM 类
model = MM_VSSM()
# 打印模型的类名,确保导入和初始化正确
print("Model class:", model.__class__.__name__)
print("Model:", model)
print(model.__dict__)
if __name__ == "__main__":
main()
print("Model:", model)
Model: MM_VSSM(
(patch_embed): Sequential( VMamba1427
(0): Conv2d(3, 96, kernel_size=(4, 4), stride=(4, 4))
(1): Permute()
(2): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
)
(layers): ModuleList(
(0): Sequential(
(blocks): Sequential(
(0): VSSBlock(
(norm): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
(op): SS2D(
(out_norm): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
(in_proj): Linear(in_features=96, out_features=384, bias=False)
(act): SiLU()
(conv2d): Conv2d(192, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=192)
(out_act): Identity()
(out_proj): Linear(in_features=192, out_features=96, bias=False)
(dropout): Identity()
)
(drop_path): timm.DropPath(0.0)
(norm2): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=96, out_features=384, bias=True)
(act): GELU(approximate='none')
(fc2): Linear(in_features=384, out_features=96, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
(1): VSSBlock(
(norm): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
(op): SS2D(
(out_norm): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
(in_proj): Linear(in_features=96, out_features=384, bias=False)
(act): SiLU()
(conv2d): Conv2d(192, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=192)
(out_act): Identity()
(out_proj): Linear(in_features=192, out_features=96, bias=False)
(dropout): Identity()
)
(drop_path): timm.DropPath(0.0071428571827709675)
(norm2): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=96, out_features=384, bias=True)
(act): GELU(approximate='none')
(fc2): Linear(in_features=384, out_features=96, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
)
(downsample): Sequential(
(0): Permute()
(1): Conv2d(96, 192, kernel_size=(2, 2), stride=(2, 2))
(2): Permute()
(3): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
)
)
(1): Sequential(
(blocks): Sequential(
(0): VSSBlock(
(norm): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
(op): SS2D(
(out_norm): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
(in_proj): Linear(in_features=192, out_features=768, bias=False)
(act): SiLU()
(conv2d): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384)
(out_act): Identity()
(out_proj): Linear(in_features=384, out_features=192, bias=False)
(dropout): Identity()
)
(drop_path): timm.DropPath(0.014285714365541935)
(norm2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=192, out_features=768, bias=True)
(act): GELU(approximate='none')
(fc2): Linear(in_features=768, out_features=192, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
(1): VSSBlock(
(norm): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
(op): SS2D(
(out_norm): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
(in_proj): Linear(in_features=192, out_features=768, bias=False)
(act): SiLU()
(conv2d): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384)
(out_act): Identity()
(out_proj): Linear(in_features=384, out_features=192, bias=False)
(dropout): Identity()
)
(drop_path): timm.DropPath(0.02142857201397419)
(norm2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=192, out_features=768, bias=True)
(act): GELU(approximate='none')
(fc2): Linear(in_features=768, out_features=192, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
)
(downsample): Sequential(
(0): Permute()
(1): Conv2d(192, 384, kernel_size=(2, 2), stride=(2, 2))
(2): Permute()
(3): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
)
)
(2): Sequential(
(blocks): Sequential(
(0): VSSBlock(
(norm): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
(op): SS2D(
(out_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(in_proj): Linear(in_features=384, out_features=1536, bias=False)
(act): SiLU()
(conv2d): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768)
(out_act): Identity()
(out_proj): Linear(in_features=768, out_features=384, bias=False)
(dropout): Identity()
)
(drop_path): timm.DropPath(0.02857142873108387)
(norm2): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, bias=True)
(act): GELU(approximate='none')
(fc2): Linear(in_features=1536, out_features=384, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
(1): VSSBlock(
(norm): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
(op): SS2D(
(out_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(in_proj): Linear(in_features=384, out_features=1536, bias=False)
(act): SiLU()
(conv2d): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768)
(out_act): Identity()
(out_proj): Linear(in_features=768, out_features=384, bias=False)
(dropout): Identity()
)
(drop_path): timm.DropPath(0.0357142873108387)
(norm2): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, bias=True)
(act): GELU(approximate='none')
(fc2): Linear(in_features=1536, out_features=384, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
(2): VSSBlock(
(norm): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
(op): SS2D(
(out_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(in_proj): Linear(in_features=384, out_features=1536, bias=False)
(act): SiLU()
(conv2d): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768)
(out_act): Identity()
(out_proj): Linear(in_features=768, out_features=384, bias=False)
(dropout): Identity()
)
(drop_path): timm.DropPath(0.04285714402794838)
(norm2): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, bias=True)
(act): GELU(approximate='none')
(fc2): Linear(in_features=1536, out_features=384, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
(3): VSSBlock(
(norm): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
(op): SS2D(
(out_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(in_proj): Linear(in_features=384, out_features=1536, bias=False)
(act): SiLU()
(conv2d): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768)
(out_act): Identity()
(out_proj): Linear(in_features=768, out_features=384, bias=False)
(dropout): Identity()
)
(drop_path): timm.DropPath(0.05000000074505806)
(norm2): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, bias=True)
(act): GELU(approximate='none')
(fc2): Linear(in_features=1536, out_features=384, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
(4): VSSBlock(
(norm): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
(op): SS2D(
(out_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(in_proj): Linear(in_features=384, out_features=1536, bias=False)
(act): SiLU()
(conv2d): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768)
(out_act): Identity()
(out_proj): Linear(in_features=768, out_features=384, bias=False)
(dropout): Identity()
)
(drop_path): timm.DropPath(0.05714285746216774)
(norm2): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, bias=True)
(act): GELU(approximate='none')
(fc2): Linear(in_features=1536, out_features=384, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
(5): VSSBlock(
(norm): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
(op): SS2D(
(out_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(in_proj): Linear(in_features=384, out_features=1536, bias=False)
(act): SiLU()
(conv2d): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768)
(out_act): Identity()
(out_proj): Linear(in_features=768, out_features=384, bias=False)
(dropout): Identity()
)
(drop_path): timm.DropPath(0.06428571045398712)
(norm2): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, bias=True)
(act): GELU(approximate='none')
(fc2): Linear(in_features=1536, out_features=384, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
(6): VSSBlock(
(norm): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
(op): SS2D(
(out_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(in_proj): Linear(in_features=384, out_features=1536, bias=False)
(act): SiLU()
(conv2d): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768)
(out_act): Identity()
(out_proj): Linear(in_features=768, out_features=384, bias=False)
(dropout): Identity()
)
(drop_path): timm.DropPath(0.0714285746216774)
(norm2): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, bias=True)
(act): GELU(approximate='none')
(fc2): Linear(in_features=1536, out_features=384, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
(7): VSSBlock(
(norm): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
(op): SS2D(
(out_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(in_proj): Linear(in_features=384, out_features=1536, bias=False)
(act): SiLU()
(conv2d): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768)
(out_act): Identity()
(out_proj): Linear(in_features=768, out_features=384, bias=False)
(dropout): Identity()
)
(drop_path): timm.DropPath(0.07857143133878708)
(norm2): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, bias=True)
(act): GELU(approximate='none')
(fc2): Linear(in_features=1536, out_features=384, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
(8): VSSBlock(
(norm): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
(op): SS2D(
(out_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(in_proj): Linear(in_features=384, out_features=1536, bias=False)
(act): SiLU()
(conv2d): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768)
(out_act): Identity()
(out_proj): Linear(in_features=768, out_features=384, bias=False)
(dropout): Identity()
)
(drop_path): timm.DropPath(0.08571428805589676)
(norm2): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, bias=True)
(act): GELU(approximate='none')
(fc2): Linear(in_features=1536, out_features=384, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
)
(downsample): Sequential(
(0): Permute()
(1): Conv2d(384, 768, kernel_size=(2, 2), stride=(2, 2))
(2): Permute()
(3): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
)
)
(3): Sequential(
(blocks): Sequential(
(0): VSSBlock(
(norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(op): SS2D(
(out_norm): LayerNorm((1536,), eps=1e-05, elementwise_affine=True)
(in_proj): Linear(in_features=768, out_features=3072, bias=False)
(act): SiLU()
(conv2d): Conv2d(1536, 1536, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1536)
(out_act): Identity()
(out_proj): Linear(in_features=1536, out_features=768, bias=False)
(dropout): Identity()
)
(drop_path): timm.DropPath(0.09285714477300644)
(norm2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=768, out_features=3072, bias=True)
(act): GELU(approximate='none')
(fc2): Linear(in_features=3072, out_features=768, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
(1): VSSBlock(
(norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(op): SS2D(
(out_norm): LayerNorm((1536,), eps=1e-05, elementwise_affine=True)
(in_proj): Linear(in_features=768, out_features=3072, bias=False)
(act): SiLU()
(conv2d): Conv2d(1536, 1536, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1536)
(out_act): Identity()
(out_proj): Linear(in_features=1536, out_features=768, bias=False)
(dropout): Identity()
)
(drop_path): timm.DropPath(0.10000000149011612)
(norm2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=768, out_features=3072, bias=True)
(act): GELU(approximate='none')
(fc2): Linear(in_features=3072, out_features=768, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
)
(downsample): Identity()
)
)
(outnorm0): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
(outnorm1): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
(outnorm2): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
(outnorm3): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
)
模型参数
print(model.__dict__)
`__dict__` 是一个字典(`dict`),它包含了对象的所有属性名(作为键)和对应的值。
对于你的 `MM_VSSM` 类的实例,如果这个类没有定义任何特殊的属性,并且没有在实例化后添加任何属性,那么 `model.__dict__` 可能不会显示任何内容,因为它可能只包含一些由 Python 在创建实例时自动添加的属性,比如 `__module__`、`__doc__` 等。
{'training': True, '_parameters': OrderedDict(), '_buffers': OrderedDict(), '_non_persistent_buffers_set': set(), '_backward_pre_hooks': OrderedDict(), '_backward_hooks': OrderedDict(), '_is_full_backward_hook': None, '_forward_hooks': OrderedDict(), '_forward_hooks_with_kwargs': OrderedDict(), '_forward_hooks_always_called': OrderedDict(), '_forward_pre_hooks': OrderedDict(), '_forward_pre_hooks_with_kwargs': OrderedDict(), '_state_dict_hooks': OrderedDict(), '_state_dict_pre_hooks': OrderedDict(), '_load_state_dict_pre_hooks': OrderedDict(), '_load_state_dict_post_hooks': OrderedDict(), '_modules': OrderedDict([('patch_embed', Sequential(
(0): Conv2d(3, 96, kernel_size=(4, 4), stride=(4, 4))
(1): Permute()
(2): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
)), ('layers', ModuleList(
(0): Sequential(
(blocks): Sequential(
(0): VSSBlock(
(norm): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
(op): SS2D(
(out_norm): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
(in_proj): Linear(in_features=96, out_features=384, bias=False)
(act): SiLU()
(conv2d): Conv2d(192, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=192)
(out_act): Identity()
(out_proj): Linear(in_features=192, out_features=96, bias=False)
(dropout): Identity()
)
(drop_path): timm.DropPath(0.0)
(norm2): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=96, out_features=384, bias=True)
(act): GELU(approximate='none')
(fc2): Linear(in_features=384, out_features=96, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
(1): VSSBlock(
(norm): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
(op): SS2D(
(out_norm): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
(in_proj): Linear(in_features=96, out_features=384, bias=False)
(act): SiLU()
(conv2d): Conv2d(192, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=192)
(out_act): Identity()
(out_proj): Linear(in_features=192, out_features=96, bias=False)
(dropout): Identity()
)
(drop_path): timm.DropPath(0.0071428571827709675)
(norm2): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=96, out_features=384, bias=True)
(act): GELU(approximate='none')
(fc2): Linear(in_features=384, out_features=96, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
)
(downsample): Sequential(
(0): Permute()
(1): Conv2d(96, 192, kernel_size=(2, 2), stride=(2, 2))
(2): Permute()
(3): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
)
)
(1): Sequential(
(blocks): Sequential(
(0): VSSBlock(
(norm): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
(op): SS2D(
(out_norm): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
(in_proj): Linear(in_features=192, out_features=768, bias=False)
(act): SiLU()
(conv2d): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384)
(out_act): Identity()
(out_proj): Linear(in_features=384, out_features=192, bias=False)
(dropout): Identity()
)
(drop_path): timm.DropPath(0.014285714365541935)
(norm2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=192, out_features=768, bias=True)
(act): GELU(approximate='none')
(fc2): Linear(in_features=768, out_features=192, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
(1): VSSBlock(
(norm): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
(op): SS2D(
(out_norm): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
(in_proj): Linear(in_features=192, out_features=768, bias=False)
(act): SiLU()
(conv2d): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384)
(out_act): Identity()
(out_proj): Linear(in_features=384, out_features=192, bias=False)
(dropout): Identity()
)
(drop_path): timm.DropPath(0.02142857201397419)
(norm2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=192, out_features=768, bias=True)
(act): GELU(approximate='none')
(fc2): Linear(in_features=768, out_features=192, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
)
(downsample): Sequential(
(0): Permute()
(1): Conv2d(192, 384, kernel_size=(2, 2), stride=(2, 2))
(2): Permute()
(3): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
)
)
(2): Sequential(
(blocks): Sequential(
(0): VSSBlock(
(norm): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
(op): SS2D(
(out_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(in_proj): Linear(in_features=384, out_features=1536, bias=False)
(act): SiLU()
(conv2d): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768)
(out_act): Identity()
(out_proj): Linear(in_features=768, out_features=384, bias=False)
(dropout): Identity()
)
(drop_path): timm.DropPath(0.02857142873108387)
(norm2): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, bias=True)
(act): GELU(approximate='none')
(fc2): Linear(in_features=1536, out_features=384, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
(1): VSSBlock(
(norm): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
(op): SS2D(
(out_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(in_proj): Linear(in_features=384, out_features=1536, bias=False)
(act): SiLU()
(conv2d): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768)
(out_act): Identity()
(out_proj): Linear(in_features=768, out_features=384, bias=False)
(dropout): Identity()
)
(drop_path): timm.DropPath(0.0357142873108387)
(norm2): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, bias=True)
(act): GELU(approximate='none')
(fc2): Linear(in_features=1536, out_features=384, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
(2): VSSBlock(
(norm): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
(op): SS2D(
(out_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(in_proj): Linear(in_features=384, out_features=1536, bias=False)
(act): SiLU()
(conv2d): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768)
(out_act): Identity()
(out_proj): Linear(in_features=768, out_features=384, bias=False)
(dropout): Identity()
)
(drop_path): timm.DropPath(0.04285714402794838)
(norm2): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, bias=True)
(act): GELU(approximate='none')
(fc2): Linear(in_features=1536, out_features=384, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
(3): VSSBlock(
(norm): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
(op): SS2D(
(out_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(in_proj): Linear(in_features=384, out_features=1536, bias=False)
(act): SiLU()
(conv2d): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768)
(out_act): Identity()
(out_proj): Linear(in_features=768, out_features=384, bias=False)
(dropout): Identity()
)
(drop_path): timm.DropPath(0.05000000074505806)
(norm2): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, bias=True)
(act): GELU(approximate='none')
(fc2): Linear(in_features=1536, out_features=384, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
(4): VSSBlock(
(norm): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
(op): SS2D(
(out_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(in_proj): Linear(in_features=384, out_features=1536, bias=False)
(act): SiLU()
(conv2d): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768)
(out_act): Identity()
(out_proj): Linear(in_features=768, out_features=384, bias=False)
(dropout): Identity()
)
(drop_path): timm.DropPath(0.05714285746216774)
(norm2): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, bias=True)
(act): GELU(approximate='none')
(fc2): Linear(in_features=1536, out_features=384, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
(5): VSSBlock(
(norm): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
(op): SS2D(
(out_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(in_proj): Linear(in_features=384, out_features=1536, bias=False)
(act): SiLU()
(conv2d): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768)
(out_act): Identity()
(out_proj): Linear(in_features=768, out_features=384, bias=False)
(dropout): Identity()
)
(drop_path): timm.DropPath(0.06428571045398712)
(norm2): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, bias=True)
(act): GELU(approximate='none')
(fc2): Linear(in_features=1536, out_features=384, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
(6): VSSBlock(
(norm): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
(op): SS2D(
(out_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(in_proj): Linear(in_features=384, out_features=1536, bias=False)
(act): SiLU()
(conv2d): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768)
(out_act): Identity()
(out_proj): Linear(in_features=768, out_features=384, bias=False)
(dropout): Identity()
)
(drop_path): timm.DropPath(0.0714285746216774)
(norm2): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, bias=True)
(act): GELU(approximate='none')
(fc2): Linear(in_features=1536, out_features=384, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
(7): VSSBlock(
(norm): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
(op): SS2D(
(out_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(in_proj): Linear(in_features=384, out_features=1536, bias=False)
(act): SiLU()
(conv2d): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768)
(out_act): Identity()
(out_proj): Linear(in_features=768, out_features=384, bias=False)
(dropout): Identity()
)
(drop_path): timm.DropPath(0.07857143133878708)
(norm2): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, bias=True)
(act): GELU(approximate='none')
(fc2): Linear(in_features=1536, out_features=384, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
(8): VSSBlock(
(norm): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
(op): SS2D(
(out_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(in_proj): Linear(in_features=384, out_features=1536, bias=False)
(act): SiLU()
(conv2d): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768)
(out_act): Identity()
(out_proj): Linear(in_features=768, out_features=384, bias=False)
(dropout): Identity()
)
(drop_path): timm.DropPath(0.08571428805589676)
(norm2): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, bias=True)
(act): GELU(approximate='none')
(fc2): Linear(in_features=1536, out_features=384, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
)
(downsample): Sequential(
(0): Permute()
(1): Conv2d(384, 768, kernel_size=(2, 2), stride=(2, 2))
(2): Permute()
(3): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
)
)
(3): Sequential(
(blocks): Sequential(
(0): VSSBlock(
(norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(op): SS2D(
(out_norm): LayerNorm((1536,), eps=1e-05, elementwise_affine=True)
(in_proj): Linear(in_features=768, out_features=3072, bias=False)
(act): SiLU()
(conv2d): Conv2d(1536, 1536, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1536)
(out_act): Identity()
(out_proj): Linear(in_features=1536, out_features=768, bias=False)
(dropout): Identity()
)
(drop_path): timm.DropPath(0.09285714477300644)
(norm2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=768, out_features=3072, bias=True)
(act): GELU(approximate='none')
(fc2): Linear(in_features=3072, out_features=768, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
(1): VSSBlock(
(norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(op): SS2D(
(out_norm): LayerNorm((1536,), eps=1e-05, elementwise_affine=True)
(in_proj): Linear(in_features=768, out_features=3072, bias=False)
(act): SiLU()
(conv2d): Conv2d(1536, 1536, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1536)
(out_act): Identity()
(out_proj): Linear(in_features=1536, out_features=768, bias=False)
(dropout): Identity()
)
(drop_path): timm.DropPath(0.10000000149011612)
(norm2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=768, out_features=3072, bias=True)
(act): GELU(approximate='none')
(fc2): Linear(in_features=3072, out_features=768, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
)
(downsample): Identity()
)
)), ('outnorm0', LayerNorm((96,), eps=1e-05, elementwise_affine=True)), ('outnorm1', LayerNorm((192,), eps=1e-05, elementwise_affine=True)), ('outnorm2', LayerNorm((384,), eps=1e-05, elementwise_affine=True)), ('outnorm3', LayerNorm((768,), eps=1e-05, elementwise_affine=True))]), 'channel_first': False, 'num_classes': 1000, 'num_layers': 4, 'num_features': 768, 'dims': [96, 192, 384, 768], 'pos_embed': None, 'out_indices': (0, 1, 2, 3), '_is_init': False, 'init_cfg': None}