timm使用swin-transformer

1.安装

pip install timm

2.timm中有多少个预训练模型

#timm中有多少个预训练模型
model_pretrain_list = timm.list_models(pretrained=True)
print(len(model_pretrain_list), model_pretrain_list[:3])

在这里插入图片描述

3加载swin模型一般准会出错

model_ft = timm.create_model('swin_base_patch4_window7_224', pretrained=True, drop_path_rate = 0.2)

在这里插入图片描述
报错的内容如下

Downloading: "https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window7_224_22kto1k.pth" to /root/.cache/torch/hub/checkpoints/swin_base_patch4_window7_224_22kto1k.pth

解决办法 去swin官网下载对应的swin_base_patch4_window7_224.pth(所有模型我都存自己百度网盘了)文件 然后根据提示 重命名为swin_base_patch4_window7_224_22kto1k.pth
再将该文件移动到/root/.cache/torch/hub/checkpoints/ 该目录下
这样timm就可以爽歪歪的用了

4下载预训练模型的官网

  • 官网:https://github.com/microsoft/Swin-Transformer 官网提供了百度云的下载连接
  • hrnet https://github.com/HRNet/HRNet-Image-Classification
    在这里插入图片描述
    在这里插入图片描述
    在这里插入图片描述

在这里插入图片描述

注意convnext

  • connext输出与分类层的输入 一定要一样

在这里插入图片描述

timm中可用的swin模型

```python
#可用的swin模型
swin_transformer = ['swin_base_patch4_window7_224',
 'swin_base_patch4_window7_224_in22k',
 'swin_base_patch4_window12_384',
 'swin_base_patch4_window12_384_in22k',
 'swin_large_patch4_window7_224',
 'swin_large_patch4_window7_224_in22k',
 'swin_large_patch4_window12_384',
 'swin_large_patch4_window12_384_in22k',
 'swin_s3_base_224',
 'swin_s3_small_224',
 'swin_s3_tiny_224',
 'swin_small_patch4_window7_224',
 'swin_tiny_patch4_window7_224',
 'swinv2_base_window8_256',
 'swinv2_base_window12_192_22k',
 'swinv2_base_window12to16_192to256_22kft1k',
 'swinv2_base_window12to24_192to384_22kft1k',
 'swinv2_base_window16_256',
 'swinv2_cr_small_224',
 'swinv2_cr_small_ns_224',
 'swinv2_cr_tiny_ns_224',
 'swinv2_large_window12_192_22k',
 'swinv2_large_window12to16_192to256_22kft1k',
 'swinv2_large_window12to24_192to384_22kft1k',
 'swinv2_small_window8_256',
 'swinv2_small_window16_256',
 'swinv2_tiny_window8_256',
 'swinv2_tiny_window16_256',]
#可用的VIT模型
vision_tranformer = ['visformer_small',
 'vit_base_patch8_224',
 'vit_base_patch8_224_dino',
 'vit_base_patch8_224_in21k',
 'vit_base_patch16_224',
 'vit_base_patch16_224_dino',
 'vit_base_patch16_224_in21k',
 'vit_base_patch16_224_miil',
 'vit_base_patch16_224_miil_in21k',
 'vit_base_patch16_224_sam',
 'vit_base_patch16_384',
 'vit_base_patch16_rpn_224',
 'vit_base_patch32_224',
 'vit_base_patch32_224_clip_laion2b',
 'vit_base_patch32_224_in21k',
 'vit_base_patch32_224_sam',
 'vit_base_patch32_384',
 'vit_base_r50_s16_224_in21k',
 'vit_base_r50_s16_384',
 'vit_giant_patch14_224_clip_laion2b',
 'vit_huge_patch14_224_clip_laion2b',
 'vit_huge_patch14_224_in21k',
 'vit_large_patch14_224_clip_laion2b',
 'vit_large_patch16_224',
 'vit_large_patch16_224_in21k',
 'vit_large_patch16_384',
 'vit_large_patch32_224_in21k',
 'vit_large_patch32_384',
 'vit_large_r50_s32_224',
 'vit_large_r50_s32_224_in21k',
 'vit_large_r50_s32_384',
 'vit_relpos_base_patch16_224',
 'vit_relpos_base_patch16_clsgap_224',
 'vit_relpos_base_patch32_plus_rpn_256',
 'vit_relpos_medium_patch16_224',
 'vit_relpos_medium_patch16_cls_224',
 'vit_relpos_medium_patch16_rpn_224',
 'vit_relpos_small_patch16_224',
 'vit_small_patch8_224_dino',
 'vit_small_patch16_224',
 'vit_small_patch16_224_dino',
 'vit_small_patch16_224_in21k',
 'vit_small_patch16_384',
 'vit_small_patch32_224',
 'vit_small_patch32_224_in21k',
 'vit_small_patch32_384',
 'vit_small_r26_s32_224',
 'vit_small_r26_s32_224_in21k',
 'vit_small_r26_s32_384',
 'vit_srelpos_medium_patch16_224',
 'vit_srelpos_small_patch16_224',
 'vit_tiny_patch16_224',
 'vit_tiny_patch16_224_in21k',
 'vit_tiny_patch16_384',
 'vit_tiny_r_s16_p8_224',
 'vit_tiny_r_s16_p8_224_in21k',
 'vit_tiny_r_s16_p8_384',]`



参考文章
[vison transformer](https://zhuanlan.zhihu.com/p/350837279)
[swin](https://zhuanlan.zhihu.com/p/485716110#:~:text=Swin%20Transformer%20%E6%98%AF%E5%9C%A8%20Vision%20Transformer%20%E7%9A%84%E5%9F%BA%E7%A1%80%E4%B8%8A%E4%BD%BF%E7%94%A8%E6%BB%91%E5%8A%A8%E7%AA%97%E5%8F%A3%EF%BC%88shifted,windows,%20SW%EF%BC%89%E8%BF%9B%E8%A1%8C%E6%94%B9%E9%80%A0%E8%80%8C%E6%9D%A5%E3%80%82%20%E5%AE%83%E5%B0%86%20Vision%20Transformer%20%E4%B8%AD%E5%9B%BA%E5%AE%9A%E5%A4%A7%E5%B0%8F%E7%9A%84%E9%87%87%E6%A0%B7%E5%BF%AB%E6%8C%89%E7%85%A7%E5%B1%82%E6%AC%A1%E5%88%86%E6%88%90%E4%B8%8D%E5%90%8C%E5%A4%A7%E5%B0%8F%E7%9A%84%E5%9D%97%EF%BC%88Windows%EF%BC%89%EF%BC%8C%E6%AF%8F%E4%B8%80%E4%B8%AA%E5%9D%97%E4%B9%8B%E9%97%B4%E7%9A%84%E4%BF%A1%E6%81%AF%E5%B9%B6%E4%B8%8D%E5%85%B1%E9%80%9A%E3%80%81%E7%8B%AC%E7%AB%8B%E8%BF%90%E7%AE%97%E4%BB%8E%E8%80%8C%E5%A4%A7%E5%A4%A7%E6%8F%90%E9%AB%98%E4%BA%86%E8%AE%A1%E7%AE%97%E6%95%88%E7%8E%87%E3%80%82)



			
			
  • 3
    点赞
  • 19
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
以下是基于PyTorch实现的ResNet和Swin Transformer的结合代码: ```python import torch import torch.nn as nn import torch.nn.functional as F from timm.models.layers import DropPath, to_2tuple, trunc_normal_ from timm.models.resnet import conv1x1, conv3x3, BasicBlock, Bottleneck from timm.models.vision_transformer import Mlp, PatchEmbed, Attention, Block class ResNetSwinTransformer(nn.Module): def __init__(self, img_size=224, patch_size=4, in_chans=3, num_classes=1000, embed_dim=96, depth=12, num_heads=8, mlp_ratio=4., qkv_bias=True, qk_scale=None, drop_rate=0., attn_drop_rate=0., drop_path_rate=0.1): super().__init__() self.num_classes = num_classes self.drop_rate = drop_rate self.attn_drop_rate = attn_drop_rate self.drop_path_rate = drop_path_rate # ResNet stem self.stem = nn.Sequential( conv3x3(in_chans, 64, stride=2), nn.BatchNorm2d(64), nn.ReLU(inplace=True), conv3x3(64, 64), nn.BatchNorm2d(64), nn.ReLU(inplace=True), conv3x3(64, 128) ) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) # Swin Transformer self.patch_embed = PatchEmbed( img_size=img_size, patch_size=patch_size, in_chans=128, embed_dim=embed_dim) self.pos_embed = nn.Parameter(torch.zeros(1, self.patch_embed.num_patches, embed_dim)) self.pos_drop = nn.Dropout(p=drop_rate) self.blocks = nn.ModuleList([ SwinTransformerBlock( dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=DropPath(drop_path_rate) if drop_path_rate > 0. else nn.Identity() ) for i in range(depth) ]) self.norm = nn.LayerNorm(embed_dim) self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) self.fc = nn.Linear(embed_dim, num_classes) def forward_features(self, x): # ResNet stem x = self.stem(x) x = self.maxpool(x) # Swin Transformer x = self.patch_embed(x) x = x + self.pos_embed x = self.pos_drop(x) for block in self.blocks: x = block(x) x = self.norm(x) return x def forward(self, x): x = self.forward_features(x) x = self.avgpool(x).flatten(1) x = self.fc(x) return x class SwinTransformerBlock(nn.Module): def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=True, qk_scale=None, drop=0., attn_drop=0., drop_path=None): super().__init__() self.norm1 = nn.LayerNorm(dim) self.attn = Attention( dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=drop ) self.drop_path = drop_path self.norm2 = nn.LayerNorm(dim) self.mlp = Mlp(in_features=dim, hidden_features=int(dim * mlp_ratio), act_layer=nn.GELU, drop=drop) def forward(self, x): x = x + self.drop_path(self.attn(self.norm1(x))) x = x + self.drop_path(self.mlp(self.norm2(x))) return x ``` 其中,`ResNetSwinTransformer`类实现了ResNet和Swin Transformer的结合,包括ResNet的stem和Swin Transformer的patch embedding、position embedding、Transformer block和layer normalization。 `SwinTransformerBlock`类实现了Swin Transformer中的Transformer block,包括multi-head attention、feedforward network和layer normalization。其中,通过DropPath实现了stochastic depth的效果。 你可以根据自己的需求修改一些超参数或者调整网络结构,例如修改`num_classes`、`depth`、`embed_dim`、`num_heads`、`mlp_ratio`、`drop_rate`、`attn_drop_rate`、`drop_path_rate`等。
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值