run(settings):
# update settings based on cfg
update_settings(settings, cfg)
# Build dataloaders
loader_train, loader_val = build_dataloaders(cfg, settings)
-----------------------------------------------------------------------------------------------------------------------
loader_train = LTRLoader('train', dataset_train, training=True, batch_size=cfg.TRAIN.BATCH_SIZE, shuffle=shuffle,
num_workers=cfg.TRAIN.NUM_WORKER, drop_last=True, stack_dim=1, sampler=train_sampler)
#LTRLoader:1875
dataset_val = sampler.TrackingSampler(datasets=names2datasets(cfg.DATA.VAL.DATASETS_NAME, settings, opencv_loader),
p_datasets=cfg.DATA.VAL.DATASETS_RATIO,
samples_per_epoch=cfg.DATA.VAL.SAMPLE_PER_EPOCH,
max_gap=cfg.DATA.MAX_SAMPLE_INTERVAL, num_search_frames=settings.num_search,
num_template_frames=settings.num_template, processing=data_processing_val,
frame_sample_mode=sampler_mode, train_cls=train_score, pos_prob=0.5)
#TrackingSampler:10000
loader_val = LTRLoader('val', dataset_val, training=False, batch_size=cfg.TRAIN.BATCH_SIZE,
num_workers=cfg.TRAIN.NUM_WORKER, drop_last=True, stack_dim=1, sampler=val_sampler,
epoch_interval=cfg.TRAIN.VAL_EPOCH_INTERVAL)
#LTRLoader:312
-----------------------------------------------------------------------------------------------------------------------
net = build_mixformer_online_score(cfg, settings)
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
backbone = get_mixformer_online_model(cfg) # backbone without positional encoding and attention mask
***********************************************************************************************************************
msvit_spec = config.MODEL.BACKBONE
msvit = ConvolutionalVisionTransformer(
in_chans=3,
act_layer=QuickGELU,
norm_layer=partial(LayerNorm, eps=1e-5),
init=getattr(msvit_spec, 'INIT', 'trunc_norm'),
spec=msvit_spec
)
#######################################################################################################################
ConvolutionalVisionTransformer(
(stage0): VisionTransformer(
(patch_embed): ConvEmbed(
(proj): Conv2d(3, 64, kernel_size=(7, 7), stride=(4, 4), padding=(2, 2))
(norm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
)
(pos_drop): Dropout(p=0.0, inplace=False)
(blocks): ModuleList(
(0): Block(
(norm1): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
(attn): Attention(
(conv_proj_q): Sequential(
(conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=64, bias=False)
(bn): FrozenBatchNorm2d()
(rearrage): Rearrange('b c h w -> b (h w) c')
)
(conv_proj_k): Sequential(
(conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=64, bias=False)
(bn): FrozenBatchNorm2d()
(rearrage): Rearrange('b c h w -> b (h w) c')
)
(conv_proj_v): Sequential(
(conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=64, bias=False)
(bn): FrozenBatchNorm2d()
(rearrage): Rearrange('b c h w -> b (h w) c')
)
(proj_q): Linear(in_features=64, out_features=64, bias=True)
(proj_k): Linear(in_features=64, out_features=64, bias=True)
(proj_v): Linear(in_features=64, out_features=64, bias=True)
(attn_drop): Dropout(p=0.0, inplace=False)
(proj): Linear(in_features=64, out_features=64, bias=True)
(proj_drop): Dropout(p=0.0, inplace=False)
)
(drop_path): Identity()
(norm2): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=64, out_features=256, bias=True)
(act): QuickGELU()
(fc2): Linear(in_features=256, out_features=64, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
)
)
(stage1): VisionTransformer(
(patch_embed): ConvEmbed(
(proj): Conv2d(64, 192, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
(norm): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
)
(pos_drop): Dropout(p=0.0, inplace=False)
(blocks): ModuleList(
(0,1,2,3): Block(
(norm1): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
(attn): Attention(
(conv_proj_q): Sequential(
(conv): Conv2d(192, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=192, bias=False)
(bn): FrozenBatchNorm2d()
(rearrage): Rearrange('b c h w -> b (h w) c')
)
(conv_proj_k): Sequential(
(conv): Conv2d(192, 192, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=192, bias=False)
(bn): FrozenBatchNorm2d()
(rearrage): Rearrange('b c h w -> b (h w) c')
)
(conv_proj_v): Sequential(
(conv): Conv2d(192, 192, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=192, bias=False)
(bn): FrozenBatchNorm2d()
(rearrage): Rearrange('b c h w -> b (h w) c')
)
(proj_q): Linear(in_features=192, out_features=192, bias=True)
(proj_k): Linear(in_features=192, out_features=192, bias=True)
(proj_v): Linear(in_features=192, out_features=192, bias=True)
(attn_drop): Dropout(p=0.0, inplace=False)
(proj): Linear(in_features=192, out_features=192, bias=True)
(proj_drop): Dropout(p=0.0, inplace=False)
)
(drop_path): Identity()
(norm2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=192, out_features=768, bias=True)
(act): QuickGELU()
(fc2): Linear(in_features=768, out_features=192, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
)
(stage2): VisionTransformer(
(patch_embed): ConvEmbed(
(proj): Conv2d(192, 384, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
(norm): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
)
(pos_drop): Dropout(p=0.0, inplace=False)
(blocks): ModuleList(
(0-16): Block(
(norm1): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
(attn): Attention(
(conv_proj_q): Sequential(
(conv): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384, bias=False)
(bn): FrozenBatchNorm2d()
(rearrage): Rearrange('b c h w -> b (h w) c')
)
(conv_proj_k): Sequential(
(conv): Conv2d(384, 384, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=384, bias=False)
(bn): FrozenBatchNorm2d()
(rearrage): Rearrange('b c h w -> b (h w) c')
)
(conv_proj_v): Sequential(
(conv): Conv2d(384, 384, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=384, bias=False)
(bn): FrozenBatchNorm2d()
(rearrage): Rearrange('b c h w -> b (h w) c')
)
(proj_q): Linear(in_features=384, out_features=384, bias=True)
(proj_k): Linear(in_features=384, out_features=384, bias=True)
(proj_v): Linear(in_features=384, out_features=384, bias=True)
(attn_drop): Dropout(p=0.0, inplace=False)
(proj): Linear(in_features=384, out_features=384, bias=True)
(proj_drop): Dropout(p=0.0, inplace=False)
)
(drop_path): Identity()
(norm2): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=384, out_features=1536, bias=True)
(act): QuickGELU()
(fc2): Linear(in_features=1536, out_features=384, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
#######################################################################################################################
if config.MODEL.BACKBONE.PRETRAINED:
try:
ckpt_path = config.MODEL.BACKBONE.PRETRAINED_PATH
ckpt = torch.load(ckpt_path, map_location='cpu')
missing_keys, unexpected_keys = msvit.load_state_dict(ckpt, strict=False)
if is_main_process():
print("missing keys:", missing_keys)
print("unexpected keys:", unexpected_keys)
print("Loading pretrained CVT done.")
except:
print("Warning: Pretrained CVT weights are not loaded")
return msvit
***********************************************************************************************************************
box_head = build_box_head(cfg) # a simple corner head
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
Corner_Predictor(
(conv1_tl): Sequential(
(0): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): FrozenBatchNorm2d()
(2): ReLU(inplace=True)
)
(conv2_tl): Sequential(
(0): Conv2d(384, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): FrozenBatchNorm2d()
(2): ReLU(inplace=True)
)
(conv3_tl): Sequential(
(0): Conv2d(192, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): FrozenBatchNorm2d()
(2): ReLU(inplace=True)
)
(conv4_tl): Sequential(
(0): Conv2d(96, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): FrozenBatchNorm2d()
(2): ReLU(inplace=True)
)
(conv5_tl): Conv2d(48, 1, kernel_size=(1, 1), stride=(1, 1))
(conv1_br): Sequential(
(0): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): FrozenBatchNorm2d()
(2): ReLU(inplace=True)
)
(conv2_br): Sequential(
(0): Conv2d(384, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): FrozenBatchNorm2d()
(2): ReLU(inplace=True)
)
(conv3_br): Sequential(
(0): Conv2d(192, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): FrozenBatchNorm2d()
(2): ReLU(inplace=True)
)
(conv4_br): Sequential(
(0): Conv2d(96, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): FrozenBatchNorm2d()
(2): ReLU(inplace=True)
)
(conv5_br): Conv2d(48, 1, kernel_size=(1, 1), stride=(1, 1))
)
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
score_branch = ScoreDecoder(cfg, pool_size=4) # the proposed score prediction module (SPM)
=======================================================================================================================
=======================================================================================================================
model = MixFormerOnlineScore(
backbone,
box_head,
score_branch,
head_type=cfg.MODEL.HEAD_TYPE
)
=======================================================================================================================
self.backbone = backbone
self.box_head = box_head
self.score_branch = score_branch
self.head_type = head_type
=======================================================================================================================
if cfg.MODEL.PRETRAINED_STAGE1 and train:
try:
ckpt_path = settings.stage1_model '/home/lq/models/mixformer/models/711_MixFormer_ep0500.pth.tar'
ckpt = torch.load(ckpt_path, map_location='cpu')
missing_keys, unexpected_keys = model.load_state_dict(ckpt['net'], strict=False)
if is_main_process():
print("missing keys:", missing_keys)
print("unexpected keys:", unexpected_keys)
print("Loading pretrained mixformer weights done.")
except:
print("Warning: Pretrained mixformer weights are not loaded")
return model
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# wrap networks to distributed one
net.cuda()
settings.device = torch.device("cuda:0")
# settings.save_every_epoch = True
# Loss functions and Actors
if settings.script_name == 'mixformer_online':
objective = {'giou': giou_loss, 'l1': l1_loss, 'score': BCEWithLogitsLoss()}
loss_weight = {'giou': cfg.TRAIN.GIOU_WEIGHT, 'l1': cfg.TRAIN.L1_WEIGHT, 'score': cfg.TRAIN.SCORE_WEIGHT}
actor = MixFormerActor(net=net, objective=objective, loss_weight=loss_weight, settings=settings, run_score_head=True)
else:
raise ValueError("illegal script name")
# Optimizer, parameters, and learning rates
optimizer, lr_scheduler = get_optimizer_scheduler(net, cfg)
use_amp = getattr(cfg.TRAIN, "AMP", False)
trainer = LTRTrainer(actor, [loader_train, loader_val], optimizer, settings, lr_scheduler, use_amp=use_amp)
=======================================================================================================================
# Initialize statistics variables
self.stats = OrderedDict({loader.name: None for loader in self.loaders}) OrderedDict([('train', None), ('val', None)])
self.move_data_to_gpu = getattr(settings, 'move_data_to_gpu', True)
=======================================================================================================================
# train process
trainer.train(cfg.TRAIN.EPOCH, load_latest=True, fail_safe=True)
mixformer_onlinescore train
于 2022-07-12 23:11:38 首次发布