TransCrowd: weakly-supervised crowd counting with transformers;使用transformer进行弱监督的人群计数

from functools import partial
import torch
from timm.models.vision_transformer import VisionTransformer,_cfg#type func
from timm.models.registry import register_model
from timm.models.layers import trunc_normal_

# print(type(VisionTransformer),type(_cfg))

class Visiontransformer_gap(VisionTransformer):

    def __init__(self,*args,**kwargs):

        super().__init__(*args,**kwargs)
        num_patches=self.patch_embed.num_patches
        #print(num_patches)  196
        self.pos_embed=torch.nn.Parameter(torch.zeros(1,num_patches+1,self.embed_dim))
        #                形状为(1,num_patches+1,self.embed_dim)的全零张量
        trunc_normal_(self.pos_embed,std=0.2)
        # 把上面的矩阵进行正态分布
        self.output1=torch.nn.Sequential(
            torch.nn.ReLU(),
            torch.nn.Linear(6912*4,128),#6192*4是怎么来的呢?
            torch.nn.ReLU(),
            torch.nn.Dropout(0.5),
            torch.nn.Linear(128,1)
        )
        self.output1.apply(self._init_weights)

    def forward_features(self, x: torch.Tensor) -> torch.Tensor:
        B=x.shape[0]
        x=self.patch_embed(x)#patch 嵌入
        #print(type(x),x.shape)#<class 'torch.Tensor'> torch.Size([1, 576, 768])

        cls_tokens=self.cls_token.expand(B,-1,-1)

        #print(type(cls_tokens),cls_tokens.shape)#<class 'torch.Tensor'> torch.Size([1, 1, 768])

        x=torch.cat((cls_tokens,x),dim=1)#torch.Size([1, 5777, 768])
        #print(x.shape)

        x=x+self.pos_embed#位置嵌入F
        #print(x.shape) #197,768

        x=self.pos_drop(x)
        #print(x.shape)

        for blk in self.blocks:
            x=blk(x)
            #print(type(blk),x.shape)
        x=self.norm(x)
        #print(x.shape)
        x=x[:,1:]#扔了第一个维度
        #print(x.shape)

        return x

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        #print(x.shape)torch.Size([1, 3, 384, 384])
        x=self.forward_features(x)
        #print(x.shape)torch.Size([1, 576, 768]) N是576
        x=torch.nn.functional.adaptive_avg_pool1d(x,(48))
        #print(x.shape)#torch.Size([1, 576, 48])
        # torch.Size([1, 3, 224, 224])
        # torch.Size([1, 196, 768])
        # torch.Size([1, 196, 48])

        x = x.view(x.shape[0], -1)
        #print(x.shape)#torch.Size([1, 27648])

        y=self.output1(x)

        return y

@register_model
def patch_16_384_gap(pretrained=False,**kwargs):
    model=Visiontransformer_gap(
        img_size=384,patch_size=16,embed_dim=768,depth=12,num_heads=12,mlp_ratio=4,qkv_bias=True,
        norm_layer=partial(torch.nn.LayerNorm,eps=1e-6,**kwargs)

    )
    model.default_cfg=_cfg()
    if pretrained:
        chkp=torch.load('deit_base_patch16_384-8de9b5d1.pth')
        model.load_state_dict(chkp['model'],strict=False)

    return model
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值