多任务学习时需要多个网络一起训练,并设置不同的学习率,pytorch中有以下几种方法:
首先网络设置如下:
import torch
# Encoder参数共享 Decoder分别训练
Encoder = SharedEncoder().cuda()
Dose_decoder = Dose_prediction().cuda()
Gra_decoder = Gradient_regression().cuda()
criterion = torch.nn.MSELoss()
lr1 = 0.0002
lr2 = 0.0001
方法一:
# 使用三个优化器
opt1 = torch.optim.Adam(Encoder.parameters(), lr=lr1)
opt2 = torch.optim.Adam(Dose_decoder.parameters(), lr=lr1)
opt3 = torch.optim.Adam(Gra_decoder.parameters(), lr=lr2)
Encoder.train()
Dose_decoder.train()
Gra_decoder.train()
for input, label, gra_label, _ in tqdm.tqdm(train_dataloaders):
inputs = input.cuda()
labels = label.cuda()
gra_labels = gra_label.cuda()
opt1.zero_grad()
opt2.zero_grad()
opt3.zero_grad()
conv1, conv2, conv3, conv4, center = Encoder(inputs)
outputs = Dose_decoder(conv1, conv2, conv3, conv4, center)
output_gra = Gra_decoder(conv1, conv2, conv3, conv4, center)
loss_main = criterion(outputs, labels)
loss_gra = criterion(output_gra, gra_labels)
# 损失要加起来反向传播
loss = 10 * loss_main + 5 * loss_gra
loss.backward()
opt1.step()
opt2.step()
opt3.step()
方法二:
# 如果对某个网络不设置学习率,则使用最外层的lr
optimizer = torch.optim.Adam([
{'params': Encoder.parameters()},
{'params': Dose_decoder.parameters()},
{'params': Gra_decoder.parameters(),'lr': lr2}
], lr=lr1)
方法三:
使用python内置库itertools的chain方法
from itertools import chain
optimizer = torch.optim.Adam(params=chain(Encoder.parameters(), Dose_decoder.parameters(), Gra_decoder.parameters()), lr=0.0001)