from functools import partial
import torch
from timm.models.vision_transformer import VisionTransformer,_cfg#type func
from timm.models.registry import register_model
from timm.models.layers import trunc_normal_
# print(type(VisionTransformer),type(_cfg))
class Visiontransformer_gap(VisionTransformer):
def __init__(self,*args,**kwargs):
super().__init__(*args,**kwargs)
num_patches=self.patch_embed.num_patches
#print(num_patches) 196
self.pos_embed=torch.nn.Parameter(torch.zeros(1,num_patches+1,self.embed_dim))
# 形状为(1,num_patches+1,self.embed_dim)的全零张量
trunc_normal_(self.pos_embed,std=0.2)
# 把上面的矩阵进行正态分布
self.output1=torch.nn.Sequential(
torch.nn.ReLU(),
torch.nn.Linear(6912*4,128),#6192*4是怎么来的呢?
torch.nn.ReLU(),
torch.nn.Dropout(0.5),
torch.nn.Linear(128,1)
)
self.output1.apply(self._init_weights)
def forward_features(self, x: torch.Tensor) -> torch.Tensor:
B=x.shape[0]
x=self.patch_embed(x)#patch 嵌入
#print(type(x),x.shape)#<class 'torch.Tensor'> torch.Size([1, 576, 768])
cls_tokens=self.cls_token.expand(B,-1,-1)
#print(type(cls_tokens),cls_tokens.shape)#<class 'torch.Tensor'> torch.Size([1, 1, 768])
x=torch.cat((cls_tokens,x),dim=1)#torch.Size([1, 5777, 768])
#print(x.shape)
x=x+self.pos_embed#位置嵌入F
#print(x.shape) #197,768
x=self.pos_drop(x)
#print(x.shape)
for blk in self.blocks:
x=blk(x)
#print(type(blk),x.shape)
x=self.norm(x)
#print(x.shape)
x=x[:,1:]#扔了第一个维度
#print(x.shape)
return x
def forward(self, x: torch.Tensor) -> torch.Tensor:
#print(x.shape)torch.Size([1, 3, 384, 384])
x=self.forward_features(x)
#print(x.shape)torch.Size([1, 576, 768]) N是576
x=torch.nn.functional.adaptive_avg_pool1d(x,(48))
#print(x.shape)#torch.Size([1, 576, 48])
# torch.Size([1, 3, 224, 224])
# torch.Size([1, 196, 768])
# torch.Size([1, 196, 48])
x = x.view(x.shape[0], -1)
#print(x.shape)#torch.Size([1, 27648])
y=self.output1(x)
return y
@register_model
def patch_16_384_gap(pretrained=False,**kwargs):
model=Visiontransformer_gap(
img_size=384,patch_size=16,embed_dim=768,depth=12,num_heads=12,mlp_ratio=4,qkv_bias=True,
norm_layer=partial(torch.nn.LayerNorm,eps=1e-6,**kwargs)
)
model.default_cfg=_cfg()
if pretrained:
chkp=torch.load('deit_base_patch16_384-8de9b5d1.pth')
model.load_state_dict(chkp['model'],strict=False)
return model
TransCrowd: weakly-supervised crowd counting with transformers;使用transformer进行弱监督的人群计数
最新推荐文章于 2024-09-14 21:36:12 发布