# swin-transformer详解及代码复现

7 篇文章 0 订阅
3 篇文章 2 订阅
2 篇文章 0 订阅

## 2. Patch Partition & Patch Embedding

import paddle
class PatchEmbedding(nn.Layer):
def __init__(self,patch_size=4,embed_dim=96):
super().__init__()
self.patch_embed = nn.Conv2D(3,out_channels=96,kernel_size=4,stride=4)
self.norm = nn.LayerNorm(embed_dim)
def forward(self,x):
x = self.patch_embed(x) #[B,embed_dim,h,w]
x = x.flatten(2)    #[B,embed_dim,h*w]
x = x.transpose([0,2,1])
x = self.norm(x)
return x



## 3. Patch Merging

class PatchMerging(nn.Layer):
def __init__(self,resolution,dim):
super().__init__()
self.resolution = resolution
self.dim = dim
self.reduction = nn.Linear(4*dim,2*dim)
self.norm = nn.LayerNorm(4*dim)

def forward(self,x):
h ,w = self.resolution
b,_,c = x.shape
x = x.reshape([b,h,w,c])
x0 = x[:,0::2,0::2,:]
x1 = x[:,0::2,1::2,:]
x2 = x[:,1::2,0::2,:]
x3 = x[:,1::2,1::2,:]
x = x.reshape([b,-1,4*c])
x = self.norm(x)
x = self.reduction(x)
return x


PS：演示一下 x[:,0::2,0::2,:]等的作用

# 将layer分成若干个windows，然后在每个windows内attention计算
def windows_partition(x , window_size):
B , H , W , C = x.shape
x = x.reshape([B,H//window_size,window_size,W//window_size,window_size,C])
# [B,H//window_size,W//window_size,window_size,window_size,C]
x.transpose([0,1,3,2,4,5])
x.reshape([-1,window_size,window_size,C])
# [B*H//window_size*w//window_size,window_size,window_size,c]
return x


#将若干个windows合并为一个layer。
def window_reverse(window, window_size , H , W ):
B = window.shape[0]//((H//window_size)*(W//window_size))
x = window.reshape([B,H//window_size,W//window_size,window_size,window_size,-1])
x = x.transpose([0,1,3,2,4,5])
x = x.reshape([B,H,W,-1])
return x


class window_attention(nn.Layer):
super().__init__()
self.dim = dim
self.softmax = nn.Softmax(-1)
self.qkv = nn.Linear(dim,int(dim*3))
self.proj = nn.Linear(dim,dim)

x = x.reshape(new_shape)
x = x.transpose([0,2,1,3])
return x
B,N,C = x.shape
qkv = self.qkv(x).chunk(3,-1)
q = q*self.scale

# attn = self.softmax(attn)
attn = self.softmax(attn)
else:
attn = self.softmax(attn)
attn = attn.transpose([0,2,1,3])
attn = attn.reshape([B,N,C])
out = self.proj(attn)
return out


        if self.shift_size > 0:
H, W = self.resolution
h_slices = (slice(0, -self.window_size),
slice(-self.window_size, -self.shift_size),
slice(-self.shift_size, None))
w_slices = (slice(0, -self.window_size),
slice(-self.window_size, -self.shift_size),
slice(-self.shift_size, None))
cnt = 0
for h in h_slices:
for w in w_slices:
img_mask[:, h, w, :] = cnt
cnt += 1
else:



class Identity(nn.Layer):
def __init__(self):
super().__init__()
def forward(self,x):
return x

class Mlp(nn.Layer):
def __init__(self,embed_dim,mlp_ratio=4.0,dropout=0.):
super().__init__()
w_att_1,b_att_1 = self.init_weight()
w_att_2,b_att_2 = self.init_weight()
self.fc1 = nn.Linear(embed_dim,int(embed_dim*mlp_ratio),weight_attr=w_att_1,bias_attr=b_att_1)
self.fc2 = nn.Linear(int(embed_dim*mlp_ratio),embed_dim,weight_attr=w_att_2,bias_attr=b_att_2)
self.dropout = nn.Dropout(dropout)
self.act = nn.GELU()
def init_weight(self):
return  weight_attr,bias_attr
def forward(self,x):
x = self.fc1(x)
x = self.act(x)
x = self.dropout(x)
x = self.fc2(x)
x = self.dropout(x)
return x


## 4. swin block

class SwinBlock(nn.Layer):
super().__init__()
self.dim = dim
self.resolution = input_resolution
self.window_size = window_size
self.att_norm = nn.LayerNorm(dim)
self.mlp = Mlp(dim)
self.shift_size = shift_size
self.mlp_norm = nn.LayerNorm(dim)
if self.shift_size > 0:
H, W = self.resolution
h_slices = (slice(0, -self.window_size),
slice(-self.window_size, -self.shift_size),
slice(-self.shift_size, None))
w_slices = (slice(0, -self.window_size),
slice(-self.window_size, -self.shift_size),
slice(-self.shift_size, None))
cnt = 0
for h in h_slices:
for w in w_slices:
img_mask[:, h, w, :] = cnt
cnt += 1
else:

def forward(self,x):

H,W = self.resolution
B,N,C = x.shape
h = x
x = self.att_norm(x)
x = x.reshape([B,H,W,C])
if self.shift_size >0 :
else:
shift_x = x
x_windows = windows_partition(shift_x,self.window_size)
x_windows = x_windows.reshape([-1,self.window_size*self.window_size,C])
attn_windows = attn_windows.reshape([-1,self.window_size,self.window_size,C])
shifted_x = window_reverse(attn_windows,self.window_size,H,W)
if self.shift_size>0:
else:
x = shifted_x
x = x.reshape([B,-1,C])
x = h+x
h = x
x = self.mlp_norm(x)
x = self.mlp(x)
x = h+x
return x


## 5. 接下来我们将所有的模块串联起来生成一个stage

stage由若干个Swin Transformer Block和一个Patch Merging生成。

class SwinTransformerStage(nn.Layer):
super().__init__()
self.blocks = nn.LayerList()
for i in range(depth):
# print(i)
shift_size=0 if (i % 2 == 0) else window_size//2))
if patch_merging is None:
self.patch_merging = Identity()
else:
self.patch_merging = patch_merging(input_resolution,dim)
def forward(self,x):
for block in self.blocks:
x = block(x)
x = self.patch_merging(x)
return x

class SwinTransformerStage(nn.Layer):
super().__init__()
self.blocks = nn.LayerList()
for i in range(depth):
# print(i)
shift_size=0 if (i % 2 == 0) else window_size//2))
if patch_merging is None:
self.patch_merging = Identity()
else:
self.patch_merging = patch_merging(input_resolution,dim)
def forward(self,x):
for block in self.blocks:
x = block(x)
x = self.patch_merging(x)
return x

class Swin(nn.Layer):
def __init__(self,
image_size=224,
patch_size=4,
in_channels=3,
embed_dim=96,
window_size=7,
depths = [2,2,62],
num_classes=1000):
super().__init__()
self.num_classes = num_classes
self.depths = depths
self.embed_dim = embed_dim
self.num_stages = len(depths)
self.num_features = int(self.embed_dim * 2 ** (self.num_stages - 1))
self.patch_resolution = [image_size//patch_size,image_size//patch_size]
self.patch_embedding = PatchEmbedding(patch_size=patch_size,embed_dim=embed_dim)
self.stages = nn.LayerList()

stage = SwinTransformerStage(dim=int(self.embed_dim*2**idx),
input_resolution=(self.patch_resolution[0]//(2**idx),
self.patch_resolution[0]//(2**idx)),
depth=depth,
window_size=window_size,
patch_merging=PatchMerging if (idx < self.num_stages-1 ) else None )
self.stages.append(stage)
self.norm = nn.LayerNorm(self.num_features)
self.fc = nn.Linear(self.num_features,self.num_classes)
def forward(self,x):
x = self.patch_embedding(x)
for stage in self.stages:
x = stage(x)
x = self.norm(x)
x = x.transpose([0,2,1])
x = self.avgpool(x)
x = x.flatten(1)
x = self.fc(x)
return x


## 6. 输出网络

    model = Swin()
print(model)
out = model(t)
print(out.shape)

Swin(
(patch_embedding): PatchEmbedding(
(patch_embed): Conv2D(3, 96, kernel_size=[4, 4], stride=[4, 4], data_format=NCHW)
(norm): LayerNorm(normalized_shape=[96], epsilon=1e-05)
)
(stages): LayerList(
(0): SwinTransformerStage(
(blocks): LayerList(
(0): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[96], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=96, out_features=288, dtype=float32)
(proj): Linear(in_features=96, out_features=96, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=96, out_features=384, dtype=float32)
(fc2): Linear(in_features=384, out_features=96, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[96], epsilon=1e-05)
)
(1): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[96], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=96, out_features=288, dtype=float32)
(proj): Linear(in_features=96, out_features=96, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=96, out_features=384, dtype=float32)
(fc2): Linear(in_features=384, out_features=96, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[96], epsilon=1e-05)
)
)
(patch_merging): PatchMerging(
(reduction): Linear(in_features=384, out_features=192, dtype=float32)
(norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
)
(1): SwinTransformerStage(
(blocks): LayerList(
(0): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[192], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=192, out_features=576, dtype=float32)
(proj): Linear(in_features=192, out_features=192, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=192, out_features=768, dtype=float32)
(fc2): Linear(in_features=768, out_features=192, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[192], epsilon=1e-05)
)
(1): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[192], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=192, out_features=576, dtype=float32)
(proj): Linear(in_features=192, out_features=192, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=192, out_features=768, dtype=float32)
(fc2): Linear(in_features=768, out_features=192, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[192], epsilon=1e-05)
)
)
(patch_merging): PatchMerging(
(reduction): Linear(in_features=768, out_features=384, dtype=float32)
(norm): LayerNorm(normalized_shape=[768], epsilon=1e-05)
)
)
(2): SwinTransformerStage(
(blocks): LayerList(
(0): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(1): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(2): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(3): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(4): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(5): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(6): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(7): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(8): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(9): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(10): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(11): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(12): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(13): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(14): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(15): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(16): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(17): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(18): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(19): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(20): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(21): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(22): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(23): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(24): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(25): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(26): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(27): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(28): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(29): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(30): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(31): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(32): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(33): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(34): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(35): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(36): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(37): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(38): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(39): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(40): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(41): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(42): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(43): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(44): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(45): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(46): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(47): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(48): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(49): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(50): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(51): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(52): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(53): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(54): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(55): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(56): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(57): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(58): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(59): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(60): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
(61): SwinBlock(
(att_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(attn): window_attention(
(softmax): Softmax(axis=-1)
(qkv): Linear(in_features=384, out_features=1152, dtype=float32)
(proj): Linear(in_features=384, out_features=384, dtype=float32)
)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, dtype=float32)
(fc2): Linear(in_features=1536, out_features=384, dtype=float32)
(dropout): Dropout(p=0.0, axis=None, mode=upscale_in_train)
(act): GELU(approximate=False)
)
(mlp_norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
)
)
(patch_merging): Identity()
)
)
(norm): LayerNorm(normalized_shape=[384], epsilon=1e-05)
(fc): Linear(in_features=384, out_features=1000, dtype=float32)
)

---------------------------------------------------------------------------

NameError                                 Traceback (most recent call last)

/tmp/ipykernel_790/2976751405.py in <module>
1 model = Swin()
2 print(model)
----> 3 out = model(t)
4 print(out.shape)

NameError: name 't' is not defined


## 8. 参考

• 8
点赞
• 108
收藏
觉得还不错? 一键收藏
• 打赏
• 9
评论
04-15 445
04-06 1万+
07-13 1622
05-24 2490
06-11 3346

### “相关推荐”对你有帮助么？

• 非常没帮助
• 没帮助
• 一般
• 有帮助
• 非常有帮助

apodxxx

¥2 ¥4 ¥6 ¥10 ¥20

1.余额是钱包充值的虚拟货币，按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载，可以购买VIP、C币套餐、付费专栏及课程。