TransCrowd: weakly-supervised crowd counting with transformers；使用transformer进行弱监督的人群计数

最新推荐文章于 2024-09-14 21:36:12 发布

墨骅

最新推荐文章于 2024-09-14 21:36:12 发布

阅读量443

点赞数 8

分类专栏：人黑话不多人群计数文章标签： transformer pytorch 深度学习

本文链接：https://blog.csdn.net/m0_49040755/article/details/135674355

版权

人黑话不多同时被 2 个专栏收录

18 篇文章 0 订阅

订阅专栏

人群计数

5 篇文章 0 订阅

订阅专栏

from functools import partial
import torch
from timm.models.vision_transformer import VisionTransformer,_cfg#type func
from timm.models.registry import register_model
from timm.models.layers import trunc_normal_

# print(type(VisionTransformer),type(_cfg))

class Visiontransformer_gap(VisionTransformer):

    def __init__(self,*args,**kwargs):

        super().__init__(*args,**kwargs)
        num_patches=self.patch_embed.num_patches
        #print(num_patches)  196
        self.pos_embed=torch.nn.Parameter(torch.zeros(1,num_patches+1,self.embed_dim))
        #                形状为（1，num_patches+1,self.embed_dim）的全零张量
        trunc_normal_(self.pos_embed,std=0.2)
        # 把上面的矩阵进行正态分布
        self.output1=torch.nn.Sequential(
            torch.nn.ReLU(),
            torch.nn.Linear(6912*4,128),#6192*4是怎么来的呢？
            torch.nn.ReLU(),
            torch.nn.Dropout(0.5),
            torch.nn.Linear(128,1)
        )
        self.output1.apply(self._init_weights)

    def forward_features(self, x: torch.Tensor) -> torch.Tensor:
        B=x.shape[0]
        x=self.patch_embed(x)#patch 嵌入
        #print(type(x),x.shape)#<class 'torch.Tensor'> torch.Size([1, 576, 768])

        cls_tokens=self.cls_token.expand(B,-1,-1)

        #print(type(cls_tokens),cls_tokens.shape)#<class 'torch.Tensor'> torch.Size([1, 1, 768])

        x=torch.cat((cls_tokens,x),dim=1)#torch.Size([1, 5777, 768])
        #print(x.shape)

        x=x+self.pos_embed#位置嵌入F
        #print(x.shape) #197,768

        x=self.pos_drop(x)
        #print(x.shape)

        for blk in self.blocks:
            x=blk(x)
            #print(type(blk),x.shape)
        x=self.norm(x)
        #print(x.shape)
        x=x[:,1:]#扔了第一个维度
        #print(x.shape)

        return x

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        #print(x.shape)torch.Size([1, 3, 384, 384])
        x=self.forward_features(x)
        #print(x.shape)torch.Size([1, 576, 768]) N是576
        x=torch.nn.functional.adaptive_avg_pool1d(x,(48))
        #print(x.shape)#torch.Size([1, 576, 48])
        # torch.Size([1, 3, 224, 224])
        # torch.Size([1, 196, 768])
        # torch.Size([1, 196, 48])

        x = x.view(x.shape[0], -1)
        #print(x.shape)#torch.Size([1, 27648])

        y=self.output1(x)

        return y

@register_model
def patch_16_384_gap(pretrained=False,**kwargs):
    model=Visiontransformer_gap(
        img_size=384,patch_size=16,embed_dim=768,depth=12,num_heads=12,mlp_ratio=4,qkv_bias=True,
        norm_layer=partial(torch.nn.LayerNorm,eps=1e-6,**kwargs)

    )
    model.default_cfg=_cfg()
    if pretrained:
        chkp=torch.load('deit_base_patch16_384-8de9b5d1.pth')
        model.load_state_dict(chkp['model'],strict=False)

    return model