A d a m W AdamW AdamW
一 基于Torch实现
optimizer = torch.optim.AdamW(model.parameters(),
lr=1e-4, weight_decay=1e-3)
optimizer.zero_grad()
# loss.backward() # 反向传播
optimizer.step()
optimizer = torch.optim.AdamW(model.parameters(),
lr=1e-4, weight_decay=1e-3)
lr_step = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.5, patience=5)
lr_step.step(val_dice)