mixformer_onlinescore train

在三年之后
已于 2022-09-27 07:52:19 修改
阅读量95
点赞数 1
分类专栏：单目标跟踪文章标签： python 计算机视觉开发语言
于 2022-07-12 23:11:38 首次发布
本文链接：https://blog.csdn.net/IS_MOKE/article/details/125721391
版权
单目标跟踪专栏收录该内容
32 篇文章 7 订阅
订阅专栏
run(settings):
    # update settings based on cfg
    update_settings(settings, cfg)

    # Build dataloaders
    loader_train, loader_val = build_dataloaders(cfg, settings)
-----------------------------------------------------------------------------------------------------------------------
	loader_train = LTRLoader('train', dataset_train, training=True, batch_size=cfg.TRAIN.BATCH_SIZE, shuffle=shuffle,
                             num_workers=cfg.TRAIN.NUM_WORKER, drop_last=True, stack_dim=1, sampler=train_sampler)
    #LTRLoader:1875
    dataset_val = sampler.TrackingSampler(datasets=names2datasets(cfg.DATA.VAL.DATASETS_NAME, settings, opencv_loader),
                                          p_datasets=cfg.DATA.VAL.DATASETS_RATIO,
                                          samples_per_epoch=cfg.DATA.VAL.SAMPLE_PER_EPOCH,
                                          max_gap=cfg.DATA.MAX_SAMPLE_INTERVAL, num_search_frames=settings.num_search,
                                          num_template_frames=settings.num_template, processing=data_processing_val,
                                          frame_sample_mode=sampler_mode, train_cls=train_score, pos_prob=0.5)
    #TrackingSampler:10000        
    loader_val = LTRLoader('val', dataset_val, training=False, batch_size=cfg.TRAIN.BATCH_SIZE,
                           num_workers=cfg.TRAIN.NUM_WORKER, drop_last=True, stack_dim=1, sampler=val_sampler,
                           epoch_interval=cfg.TRAIN.VAL_EPOCH_INTERVAL)   
    #LTRLoader:312
-----------------------------------------------------------------------------------------------------------------------
    net = build_mixformer_online_score(cfg, settings)
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
	backbone = get_mixformer_online_model(cfg)  # backbone without positional encoding and attention mask
***********************************************************************************************************************
	msvit_spec = config.MODEL.BACKBONE
    msvit = ConvolutionalVisionTransformer(
        in_chans=3,
        act_layer=QuickGELU,
        norm_layer=partial(LayerNorm, eps=1e-5),
        init=getattr(msvit_spec, 'INIT', 'trunc_norm'),
        spec=msvit_spec
    )
	#######################################################################################################################
	ConvolutionalVisionTransformer(
  (stage0): VisionTransformer(
    (patch_embed): ConvEmbed(
      (proj): Conv2d(3, 64, kernel_size=(7, 7), stride=(4, 4), padding=(2, 2))
      (norm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
    )
    (pos_drop): Dropout(p=0.0, inplace=False)
    (blocks): ModuleList(
      (0): Block(
        (norm1): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
        (attn): Attention(
          (conv_proj_q): Sequential(
            (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=64, bias=False)
            (bn): FrozenBatchNorm2d()
            (rearrage): Rearrange('b c h w -> b (h w) c')
          )
          (conv_proj_k): Sequential(
            (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=64, bias=False)
            (bn): FrozenBatchNorm2d()
            (rearrage): Rearrange('b c h w -> b (h w) c')
          )
          (conv_proj_v): Sequential(
            (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=64, bias=False)
            (bn): FrozenBatchNorm2d()
            (rearrage): Rearrange('b c h w -> b (h w) c')
          )
          (proj_q): Linear(in_features=64, out_features=64, bias=True)
          (proj_k): Linear(in_features=64, out_features=64, bias=True)
          (proj_v): Linear(in_features=64, out_features=64, bias=True)
          (attn_drop): Dropout(p=0.0, inplace=False)
          (proj): Linear(in_features=64, out_features=64, bias=True)
          (proj_drop): Dropout(p=0.0, inplace=False)
        )
        (drop_path): Identity()
        (norm2): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
        (mlp): Mlp(
          (fc1): Linear(in_features=64, out_features=256, bias=True)
          (act): QuickGELU()
          (fc2): Linear(in_features=256, out_features=64, bias=True)
          (drop): Dropout(p=0.0, inplace=False)
        )
      )
    )
  )
  (stage1): VisionTransformer(
    (patch_embed): ConvEmbed(
      (proj): Conv2d(64, 192, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (norm): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
    )
    (pos_drop): Dropout(p=0.0, inplace=False)
    (blocks): ModuleList(
      (0,1,2,3): Block(
        (norm1): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
        (attn): Attention(
          (conv_proj_q): Sequential(
            (conv): Conv2d(192, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=192, bias=False)
            (bn): FrozenBatchNorm2d()
            (rearrage): Rearrange('b c h w -> b (h w) c')
          )
          (conv_proj_k): Sequential(
            (conv): Conv2d(192, 192, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=192, bias=False)
            (bn): FrozenBatchNorm2d()
            (rearrage): Rearrange('b c h w -> b (h w) c')
          )
          (conv_proj_v): Sequential(
            (conv): Conv2d(192, 192, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=192, bias=False)
            (bn): FrozenBatchNorm2d()
            (rearrage): Rearrange('b c h w -> b (h w) c')
          )
          (proj_q): Linear(in_features=192, out_features=192, bias=True)
          (proj_k): Linear(in_features=192, out_features=192, bias=True)
          (proj_v): Linear(in_features=192, out_features=192, bias=True)
          (attn_drop): Dropout(p=0.0, inplace=False)
          (proj): Linear(in_features=192, out_features=192, bias=True)
          (proj_drop): Dropout(p=0.0, inplace=False)
        )
        (drop_path): Identity()
        (norm2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
        (mlp): Mlp(
          (fc1): Linear(in_features=192, out_features=768, bias=True)
          (act): QuickGELU()
          (fc2): Linear(in_features=768, out_features=192, bias=True)
          (drop): Dropout(p=0.0, inplace=False)
        )
      ) 
  )
  (stage2): VisionTransformer(
    (patch_embed): ConvEmbed(
      (proj): Conv2d(192, 384, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (norm): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
    )
    (pos_drop): Dropout(p=0.0, inplace=False)
    (blocks): ModuleList(
      (0-16): Block(
        (norm1): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
        (attn): Attention(
          (conv_proj_q): Sequential(
            (conv): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384, bias=False)
            (bn): FrozenBatchNorm2d()
            (rearrage): Rearrange('b c h w -> b (h w) c')
          )
          (conv_proj_k): Sequential(
            (conv): Conv2d(384, 384, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=384, bias=False)
            (bn): FrozenBatchNorm2d()
            (rearrage): Rearrange('b c h w -> b (h w) c')
          )
          (conv_proj_v): Sequential(
            (conv): Conv2d(384, 384, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=384, bias=False)
            (bn): FrozenBatchNorm2d()
            (rearrage): Rearrange('b c h w -> b (h w) c')
          )
          (proj_q): Linear(in_features=384, out_features=384, bias=True)
          (proj_k): Linear(in_features=384, out_features=384, bias=True)
          (proj_v): Linear(in_features=384, out_features=384, bias=True)
          (attn_drop): Dropout(p=0.0, inplace=False)
          (proj): Linear(in_features=384, out_features=384, bias=True)
          (proj_drop): Dropout(p=0.0, inplace=False)
        )
        (drop_path): Identity()
        (norm2): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
        (mlp): Mlp(
          (fc1): Linear(in_features=384, out_features=1536, bias=True)
          (act): QuickGELU()
          (fc2): Linear(in_features=1536, out_features=384, bias=True)
          (drop): Dropout(p=0.0, inplace=False)
        )
      )
      #######################################################################################################################	
	
    if config.MODEL.BACKBONE.PRETRAINED:
        try:
            ckpt_path = config.MODEL.BACKBONE.PRETRAINED_PATH
            ckpt = torch.load(ckpt_path, map_location='cpu')
            missing_keys, unexpected_keys = msvit.load_state_dict(ckpt, strict=False)
            if is_main_process():
                print("missing keys:", missing_keys)
                print("unexpected keys:", unexpected_keys)
                print("Loading pretrained CVT done.")
        except:
            print("Warning: Pretrained CVT weights are not loaded")

    return msvit
***********************************************************************************************************************
    box_head = build_box_head(cfg)  # a simple corner head
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
Corner_Predictor(
  (conv1_tl): Sequential(
    (0): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): FrozenBatchNorm2d()
    (2): ReLU(inplace=True)
  )
  (conv2_tl): Sequential(
    (0): Conv2d(384, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): FrozenBatchNorm2d()
    (2): ReLU(inplace=True)
  )
  (conv3_tl): Sequential(
    (0): Conv2d(192, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): FrozenBatchNorm2d()
    (2): ReLU(inplace=True)
  )
  (conv4_tl): Sequential(
    (0): Conv2d(96, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): FrozenBatchNorm2d()
    (2): ReLU(inplace=True)
  )
  (conv5_tl): Conv2d(48, 1, kernel_size=(1, 1), stride=(1, 1))
  (conv1_br): Sequential(
    (0): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): FrozenBatchNorm2d()
    (2): ReLU(inplace=True)
  )
  (conv2_br): Sequential(
    (0): Conv2d(384, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): FrozenBatchNorm2d()
    (2): ReLU(inplace=True)
  )
  (conv3_br): Sequential(
    (0): Conv2d(192, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): FrozenBatchNorm2d()
    (2): ReLU(inplace=True)
  )
  (conv4_br): Sequential(
    (0): Conv2d(96, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): FrozenBatchNorm2d()
    (2): ReLU(inplace=True)
  )
  (conv5_br): Conv2d(48, 1, kernel_size=(1, 1), stride=(1, 1))
)
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
    score_branch = ScoreDecoder(cfg, pool_size=4) # the proposed score prediction module (SPM)
=======================================================================================================================
=======================================================================================================================
    model = MixFormerOnlineScore(
        backbone,
        box_head,
        score_branch,
        head_type=cfg.MODEL.HEAD_TYPE
    )
=======================================================================================================================
		self.backbone = backbone
        self.box_head = box_head
        self.score_branch = score_branch
        self.head_type = head_type
=======================================================================================================================
    if cfg.MODEL.PRETRAINED_STAGE1 and train:
        try:
            ckpt_path = settings.stage1_model     '/home/lq/models/mixformer/models/711_MixFormer_ep0500.pth.tar'
            ckpt = torch.load(ckpt_path, map_location='cpu')
            missing_keys, unexpected_keys = model.load_state_dict(ckpt['net'], strict=False)
            if is_main_process():
                print("missing keys:", missing_keys)
                print("unexpected keys:", unexpected_keys)
                print("Loading pretrained mixformer weights done.")
        except:
            print("Warning: Pretrained mixformer weights are not loaded")

    return model
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++  

    # wrap networks to distributed one
    net.cuda()
   	settings.device = torch.device("cuda:0")
    # settings.save_every_epoch = True
    # Loss functions and Actors
	if settings.script_name == 'mixformer_online':
        objective = {'giou': giou_loss, 'l1': l1_loss, 'score': BCEWithLogitsLoss()}
        loss_weight = {'giou': cfg.TRAIN.GIOU_WEIGHT, 'l1': cfg.TRAIN.L1_WEIGHT, 'score': cfg.TRAIN.SCORE_WEIGHT}
        actor = MixFormerActor(net=net, objective=objective, loss_weight=loss_weight, settings=settings, run_score_head=True)
    else:
        raise ValueError("illegal script name")

    # Optimizer, parameters, and learning rates
    optimizer, lr_scheduler = get_optimizer_scheduler(net, cfg)
    use_amp = getattr(cfg.TRAIN, "AMP", False)
    trainer = LTRTrainer(actor, [loader_train, loader_val], optimizer, settings, lr_scheduler, use_amp=use_amp)
=======================================================================================================================
	# Initialize statistics variables
        self.stats = OrderedDict({loader.name: None for loader in self.loaders})    OrderedDict([('train', None), ('val', None)])
        self.move_data_to_gpu = getattr(settings, 'move_data_to_gpu', True)
=======================================================================================================================
    # train process
    trainer.train(cfg.TRAIN.EPOCH, load_latest=True, fail_safe=True)