【扒代码】分析数据流动过程loca forward函数

    def forward(self, x, bboxes):
        num_objects = bboxes.size(1) if not self.zero_shot else self.num_objects
        # backbone
        backbone_features = self.backbone(x)
        # print(backbone_features.shape)  [4, 3584, 64, 64]
        backbone_features = self.ccff(backbone_features)
        # print(backbone_features.shape)  [4, 3584, 64, 64]
        # prepare the encoder input
        src = self.input_proj(backbone_features)
        print(src.shape)  # [4, 256, 64, 64]
        # sys.exit(0)
        bs, c, h, w = src.size()
        pos_emb = self.pos_emb(bs, h, w, src.device).flatten(2).permute(2, 0, 1)
        src = src.flatten(2).permute(2, 0, 1)

        # push through the encoder
        if self.num_encoder_layers > 0:
            image_features = self.encoder(src, pos_emb, src_key_padding_mask=None, src_mask=None)
        else:
            image_features = src

        # prepare OPE input
        f_e = image_features.permute(1, 2, 0).reshape(-1, self.emb_dim, h, w)

        all_prototypes = self.ope(f_e, pos_emb, bboxes)

        outputs = list()
        # 问题: 这里的all_prototypes是什么?
        for i in range(all_prototypes.size(0)):
            prototypes = all_prototypes[i, ...].permute(1, 0, 2).reshape(
                bs, num_objects, self.kernel_dim, self.kernel_dim, -1
            ).permute(0, 1, 4, 2, 3).flatten(0, 2)[:, None, ...]

            response_maps = F.conv2d(
                torch.cat([f_e for _ in range(num_objects)], dim=1).flatten(0, 1).unsqueeze(0),
                prototypes,
                bias=None,
                padding=self.kernel_dim // 2,
                groups=prototypes.size(0)
            ).view(
                bs, num_objects, self.emb_dim, h, w
            ).max(dim=1)[0]

            # send through regression heads
            if i == all_prototypes.size(0) - 1:
                predicted_dmaps = self.regression_head(response_maps)
            else:
                predicted_dmaps = self.aux_heads[i](response_maps)
            outputs.append(predicted_dmaps)

        return outputs[-1], outputs[:-1]
    def forward(self, x, bboxes):
        # 确定对象的数量,如果不是零样本学习场景,则根据bboxes的数量确定
        num_objects = bboxes.size(1) if not self.zero_shot else self.num_objects
        # backbone
        # 通过主干网络提取特征
        backbone_features = self.backbone(x)
        # prepare the encoder input
        # 准备编码器的输入
        src = self.input_proj(backbone_features)
        # 获取特征的尺寸
        bs, c, h, w = src.size()
        # 生成位置编码并调整其形状以匹配编码器的输入
        pos_emb = self.pos_emb(bs, h, w, src.device).flatten(2).permute(2, 0, 1)
        # 调整src的形状
        src = src.flatten(2).permute(2, 0, 1)

        # push through the encoder
        # 通过编码器处理特征
        if self.num_encoder_layers > 0:
            image_features = self.encoder(src, pos_emb, src_key_padding_mask=None, src_mask=None)
        else:
            image_features = src

        # prepare OPE input
        # 准备OPE(对象原型提取)模块的输入
        f_e = image_features.permute(1, 2, 0).reshape(-1, self.emb_dim, h, w)

        # 调用OPE模块生成所有原型
        # 问题:原型到底是啥
        all_prototypes = self.ope(f_e, pos_emb, bboxes)

        # 初始化输出列表
        outputs = list()

        # 遍历所有原型
        # 问题: 这里的all_prototypes是什么?
        for i in range(all_prototypes.size(0)):
            # 处理每个原型
            prototypes = all_prototypes[i, ...].permute(1, 0, 2).reshape(
                bs, num_objects, self.kernel_dim, self.kernel_dim, -1
            ).permute(0, 1, 4, 2, 3).flatten(0, 2)[:, None, ...]

            # 使用原型和查询特征生成响应图
            # 原型:prototypes
            # 查询特征:[f_e for _ in range(num_objects)]
            response_maps = F.conv2d(
                torch.cat([f_e for _ in range(num_objects)], dim=1).flatten(0, 1).unsqueeze(0),
                prototypes,
                bias=None,
                padding=self.kernel_dim // 2,
                groups=prototypes.size(0)
            ).view(
                bs, num_objects, self.emb_dim, h, w
            ).max(dim=1)[0]

            # send through regression heads
            # 通过回归头处理响应图
            if i == all_prototypes.size(0) - 1:
                predicted_dmaps = self.regression_head(response_maps)
            else:
                predicted_dmaps = self.aux_heads[i](response_maps)
            # 将预测的密度图添加到输出列表
            outputs.append(predicted_dmaps)

        # 返回最终的预测密度图和中间辅助输出
        return outputs[-1], outputs[:-1]

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值