MixFormer track代码解读

tracker/ mixformer_online.py

H, W, _ = image.shape  400 720 
        self.frame_id += 1
        
x_patch_arr, resize_factor, x_amask_arr = sample_target(image, self.state, self.params.search_factor,
                                                                output_sz=self.params.search_size)  # (x1, y1, w, h)
x_patch_arr 320 320 3 将image一目标框进行缩放和裁减,形成一个320 x 320的正方区域

search = self.preprocessor.process(x_patch_arr)  变为 1 3 320 320 是配后面的网络
out_dict, _ = self.network.forward_test(search, run_score_head=True)
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

	template, search = self.backbone.forward_test(search)
	---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------           		
	  search = getattr(self, f'stage{
     i}').forward_test(search) 放入一个三阶段的ConvolutionalVisionTransformer中
			 search = self.patch_embed(search) 1 64 320 320  ->  1 64 80 80
       		 s_B, s_C, s_H, s_W = search.size(1 64 80 80,c为经过卷积之后的通道数
      		 search = rearrange(search, 'b c h w -> b (h w) c').contiguous() 1 6400 64
      		 x = self.pos_drop(x) 实现dropout,1 6400 64
      		--for i, blk in enumerate(self.blocks):
            	x = blk.forward_test(x, s_H, s_W)//实现attention模快
					 res = x   1 6400 64
       				 x = self.norm1(x)  1 6400 64
     				 attn = self.attn.forward_test(x, s_h, s_w)
							q_s, k, v = self.forward_conv_test(x, s_h, s_w)  q_s 1 6400 64 k 1 2112 64 v 1 2112 64 通过一个网络  
										k = torch.cat([self.t_k, self.ot_k, k], dim=1)   1 256 64    1 256 64  1 1600 64 ------> 1 2112 64  
      		 				q_s = rearrange(self.proj_q(q_s), 'b t (h d) -> b h t d', h=self.num_heads).contiguous()   1 1 6400 64  num_heads=1
       						k = rearrange(self.proj_k(k), 'b t (h d) -> b h t d', h=self.num_heads).contiguous()      1 1 2112 64
       						v = rearrange(self.proj_v(v), 'b t (h d) -> b h t d', h=self.num_heads).contiguous()   1 1 2112 64
       						attn_score = torch.einsum('bhlk,bhtk->bhlt', [q_s, k]) * self.scale   1 1 6400 2112转置,对应元素相乘,求和   *0.125
       						attn = F.softmax(attn_score, dim=-1)         1 1 6400 2112
        					attn = self.attn_drop(attn)
        					x_s = torch.einsum('bhlt,bhtv->bhlv', [attn, v])  转置,对应元素相乘,求和 1 1 6400 64
        					x_s = rearrange(x_s, 'b h t d -> b t (h d)').contiguous()  1 6400 64
        					x = x_s
        					x = self.proj(x)              Linear(in_features=64, out_features=64, bias=True)
        					x = self.proj_drop(x)
        			x = res + self.drop_path(attn)  1 6400 64
        			x = x + self.drop_path(self.mlp(self.norm2(x)))   1 6400 64
        					mlp      (fc1): Linear(in_features=64, out_features=256, bias=True)   (act): QuickGELU()   (fc2): Linear(in_features=256, out_features=64, bias=True)  (drop): Dropout(p=0.0, inplace=False)
        						x = self.fc1(x)
        						x = self.act(x)
        						x = self.drop(x)
        						x = self.fc2(x)
      							x = self.drop(x)  1 6400 64
        --search = x   1 6400 64
        search = rearrange(search, 'b (h w) c -> b c h w', h=s_H, w=s_W)  1 64 80 80
 完成第0个stage,进行下一个
 --------------------------------
  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值