最近开始利用Pytorch写一些深度学习的模型,没有系统的去学习pytorch的用法,也还没来得及去看别人的写法,先简单记录一些自己的想法。
利用Pytorch在写一些具有多个分支的模型时(比如具有特征融合、模型融合或者多个任务的网络结构),模型类该怎么写,loss会怎么传播,应该先将input融合再传入forward还是传入forward后再进行融合等问题。
特征融合
使用相同的模型对输入进行特征的提取,在输入FC层前进行特征融合,然后分类。
class _CCN(nn.Module):
def __init__(self, num, p, cnt):
super(_CCN, self).__init__()
self.cnt = cnt
self.features = nn.Sequential(
nn.Conv3d(1, num, 3, 1, 0, bias=False),
nn.MaxPool3d(2, 1, 0),
nn.BatchNorm3d(num),
nn.LeakyReLU(),
nn.Dropout(p),
# ? * ? * ?
)
self.classifier = nn.Sequential(
nn.Linear(self.cnt*num*?*?*?, 30),
)
# 传入forward的inpts为list,list里为tensor。
def forward(self, inputs):
input = inputs[0].cuda()
out = self.features(input)
for i in range(1, len(inputs)):
input = inputs[i].cuda()
x = self.features(input)
out = torch.cat((out, x), dim=1)
batch_size = out.shape[0]
out = out.view(batch_size, -1)
out = self.classifier(out)
return out
如此写的话,每个input会共享卷积核权重,坏处在于如果每个input原本是想学习不同的特征,那么这个权重对每个input来讲就无法达到最优。如果每个input本身就是差不多的输入,那么这种共享就会减少参数。
模型融合
使用不同的模型对输入进行特征的提取,在输入FC层之前进行特征融合,然后分类。
目前的想法是在一个类里写两个模型,按上面那种模式写多个self.features,然后将input分别送入不同的self.features,得到结果以后进行融合在输入self.classifier。
class _CCN(nn.Module):
def __init__(self, num, p, cnt):
super(_CCN, self).__init__()
self.cnt = cnt
self.features0 = nn.Sequential(
nn.Conv3d(1, num, 3, 1, 0, bias=False),
nn.MaxPool3d(2, 1, 0),
nn.BatchNorm3d(num),
nn.LeakyReLU(),
nn.Dropout(p),
# ? * ? * ?
)
self.features1 = nn.Sequential(
nn.Conv3d(1, num, 3, 1, 0, bias=False),
nn.MaxPool3d(2, 1, 0),
nn.BatchNorm3d(num),
nn.LeakyReLU(),
nn.Dropout(p),
# ? * ? * ?
)
self.classifier = nn.Sequential(
nn.Linear(self.cnt*num*?*?*?, 30),
)
def forward(self, inputs):
x0 = self.features0(inputs)
x1 = self.features1(inputs)
out = torch.cat((x0, x1), dim=1)
batch_size = out.shape[0]
out = out.view(batch_size, -1)
out = self.classifier(out)
return out
这样就解决掉了在特征融合中遇到的问题,如果多个input不想共享权重,就可以模型融合,或者写多个一样的特征提取的容器,可能还需要解决的问题是能否批量构造一堆一样的特征提取的容器,而不用写多个。
决策融合
训练不同的模型,得到输出以后进行加权求和等操作,将该结果作为最终分类结果。
class _CCN0(nn.Module):
def __init__(self, num, p):
super(_CCN0, self).__init__()
self.features = nn.Sequential(
nn.Conv3d(1, num, 3, 1, 0, bias=False),
nn.MaxPool3d(2, 1, 0),
nn.BatchNorm3d(num),
nn.LeakyReLU(),
nn.Dropout(p),
# ? * ? * ?
)
self.classifier = nn.Sequential(
nn.Linear(self.cnt*num*?*?*?, 30),
)
def forward(self, inputs):
out = self.features(inputs)
batch_size = out.shape[0]
out = out.view(batch_size, -1)
out = self.classifier(out)
return out
class _CCN1(nn.Module):
def __init__(self, num, p):
super(_CCN1, self).__init__()
self.features = nn.Sequential(
nn.Conv3d(1, num, 3, 1, 0, bias=False),
nn.MaxPool3d(2, 1, 0),
nn.BatchNorm3d(num),
nn.LeakyReLU(),
nn.Dropout(p),
# ? * ? * ?
)
self.classifier = nn.Sequential(
nn.Linear(self.cnt*num*?*?*?, 30),
)
def forward(self, inputs):
out = self.features(inputs)
batch_size = out.shape[0]
out = out.view(batch_size, -1)
out = self.classifier(out)
return out
class CCN_Wrapper():
def __init__(self, fil_num,
drop_rate,
seed,
batch_size,
balanced,
Data_dir,
exp_idx,
model_name):
self.seed = seed
self.exp_idx = exp_idx
self.Data_dir = Data_dir
self.model_name = model_name
self.eval_metric = get_accu
self.batch_size = batch_size
self.prepare_dataloader(batch_size, balanced, Data_dir)
self.model0 = _CCN0(num=fil_num, p=drop_rate).cuda()
self.model1 = _CCN1(num=fil_num, p=drop_rate).cuda()
def train(self, lr, epochs):
print("training ....")
self.optimizer0 = optim.Adam(self.model0.parameters(), lr=lr, betas=(0.5, 0.999))
self.optimizer1 = optim.Adam(self.model1.parameters(), lr=lr, betas=(0.5, 0.999))
self.criterion = nn.CrossEntropyLoss().cuda()
for self.epoch in range(epochs):
self.train_model_epoch()
valid_matrix = self.valid_model_epoch()
def train_model_epoch(self):
self.model.train(True)
for inputs, labels in self.train_dataloader:
inputs, labels = inputs.cuda(), labels.cuda()
self.model.zero_grad()
preds0 = self.model0(inputs)
preds1 = self.model1(inputs)
preds = weight0 * preds0 + weight1 * preds1
loss0 = self.criterion(preds0, labels)
loss1 = self.criterion(preds1, labels)
loss0.backward()
loss1.backward()
self.optimizer0.step()
self.optimizer1.step()
def valid_model_epoch(self):
with torch.no_grad():
self.model.train(False)
for inputs, labels in self.valid_dataloader:
inputs, labels = inputs.cuda(), labels.cuda()
preds0 = self.model0(inputs)
preds1 = self.model1(inputs)
preds = weight0 * preds0 + weight1 * preds1
acc = get_acc(preds, labels)
def test(self):
print('testing ... ')
self.model.load_state_dict(torch.load())
self.model.train(False)
with torch.no_grad():
for stage in ['train', 'valid', 'test']:
data = CCN_Data(self.Data_dir, self.exp_idx, stage=stage, seed=self.seed)
dataloader = DataLoader(data, batch_size=10, shuffle=False)
for idx, (inputs, labels) in enumerate(dataloader):
inputs, labels = inputs.cuda(), labels.cuda()
preds0 = self.model0(inputs)
preds1 = self.model1(inputs)
preds = weight0 * preds0 + weight1 * preds1
acc = get_acc(preds, labels)
def prepare_dataloader(self, batch_size, balanced, Data_dir):
train_data = CCCN_Data(Data_dir, self.exp_idx, stage='train', seed=self.seed)
valid_data = CCCN_Data(Data_dir, self.exp_idx, stage='valid', seed=self.seed)
test_data = CCCN_Data(Data_dir, self.exp_idx, stage='test', seed=self.seed)
self.train_dataloader = DataLoader(train_data, batch_size=batch_size, shuffle=True, drop_last=True)
self.valid_dataloader = DataLoader(valid_data, batch_size=1, shuffle=False)
self.test_dataloader = DataLoader(test_data, batch_size=1, shuffle=False)
上述代码其实就是两个模型分别输出结果,然后做一个加权平均,和深度学习、pytorch都没有关系。目前还在思考,如何在深度学习中将两个模型的loss进行加权平均再传播回去进行训练,如果按loss加权平均再反向传播的话应该是多任务学习了。
多任务学习
a = nn.CrossEntropyLoss()
b = nn.MSELoss()
loss_a = a(output_x, x_labels)
loss_b = b(output_y, y_labels)
loss = loss_a + loss_b
loss.backward()
参考
https://discuss.pytorch.org/t/how-to-combine-multiple-criterions-to-a-loss-function/348/7
Timeline
2020.07.14 先记录这么多,后续学习一下再来补充和修改。