Hung-Yi Lee homework[12]: Transfer Learning-Domain Adversarial Training
一、Transfer Learning原理介绍
Transfer Learning:迁移学习。把为任务A开发的模型作为初始点,重新使用在为任务B开发模型的过程中。
迁移学习和传统机器学习的区别:
迁移学习 | 传统机器学习 | |
---|---|---|
数据分布 | 训练和测试数据不需要同分布 | 训练和测试数据同分布 |
数据标签 | 不需要足够的数据标注 | 足够的数据标注 |
建模 | 可以重用之前的模型 | 每个任务分别建模 |
迁移学习的核心是:找到原领域和目标领域的相似性,而后度量并利用这种相似性。在迁移学习中,相似性是核心,度量准则是重要手段。
二、作业要求
作业的任务是TL中的domain adversarial training,即原领域是有监督的,目标领域是无监督的。
作业中source data是真实照片,target data是手写涂鸦。
source data | target data |
---|---|
DaNN原理:
我们采用DaNN来实现迁移学习。DaNN的主要核心是:模型分成两部分,上半部分是一个特征提取器,下半部分是一个分类器,让source data和target data经过特征提取器后是同分布的。这种操作能够有效降低后续分类器的错误率。
三、作业实现
作业思路:因为涂鸦图片中,重点是物体的轮廓,所以可以对source data做边缘检测,让source data更像target。
hw12.py
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Function
import cv2
import matplotlib.pyplot as plt
import torch.optim as optim
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
# 特征提取器
class FeatureExtractor(nn.Module):
def __init__(self):
super(FeatureExtractor, self).__init__()
self.conv = nn.Sequential(
nn.Conv2d(1, 64, 3, 1, 1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2),
nn.Conv2d(64, 128, 3, 1, 1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2),
nn.Conv2d(128, 256, 3, 1, 1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2),
nn.Conv2d(256, 256, 3, 1, 1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2),
nn.Conv2d(256, 512, 3, 1, 1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2)
)
def forward(self, x):
x = self.conv(x).squeeze()
return x
class LabelPredictor(nn.Module):
def __init__(self):
super(LabelPredictor, self).__init__()
self.layer = nn.Sequential(
nn.Linear(512, 512),
nn.ReLU(),
nn.Linear(512, 512),
nn.ReLU(),
nn.Linear(512, 10),
)
def forward(self, h):
c = self.layer(h)
return c
# 分类器
class DomainClassifier(nn.Module):
def __init__(self):
super(DomainClassifier, self).__init__()
self.layer = nn.Sequential(
nn.Linear(512, 512),
nn.BatchNorm1d(512),
nn.ReLU(),
nn.Linear(512, 512),
nn.BatchNorm1d(512),
nn.ReLU(),
nn.Linear(512, 512),
nn.BatchNorm1d(512),
nn.ReLU(),
nn.Linear(512, 512),
nn.BatchNorm1d(512),
nn.ReLU(),
nn.Linear(512, 1),
)
def forward(self, h):
y = self.layer(h)
return y
source_transform = transforms.Compose([
# 转成灰度图
transforms.Grayscale(),
transforms.Lambda(lambda x: cv2.Canny(np.array(x), 170, 300)),
# 将np.array 转回 skimage.Image
transforms.ToPILImage(),
# 水平翻转 (数据增广)
transforms.RandomHorizontalFlip(),
# 旋转15度
transforms.RandomRotation(15),
# 最后转成Tensor
transforms.ToTensor(),
])
target_transform = transforms.Compose([
# 转灰度图,将输入的三维变成一维
transforms.Grayscale(),
# 缩放图片,因为source data的尺寸为32x32,需要将target data的28x28放大成32x32
transforms.Resize((32, 32)),
# 水平翻转 (数据增广)
transforms.RandomHorizontalFlip(),
# 旋转15度
transforms.RandomRotation(15),
# 最后转成Tensor
transforms.ToTensor(),
])
def train_epoch(source_dataloader, target_dataloader, lamb):
# D loss: Domain Classifier的loss
# F loss: Feature Extrator & Label Predictor的loss
# total_hit: 计算目前预测对的数量
# total_num: 计算目前经过的数量
running_D_loss, running_F_loss = 0.0, 0.0
total_hit, total_num = 0.0, 0.0
for i, ((source_data, source_label), (target_data, _)) in enumerate(zip(source_dataloader, target_dataloader)):
source_data = source_data.cuda()
source_label = source_label.cuda()
target_data = target_data.cuda()
# 我们把 source data 和target data混合在一起,否则batch_norm可能会算错,两边的平均值和方差其实不太一样
mixed_data = torch.cat([source_data, target_data], dim=0)
domain_label = torch.zeros([source_data.shape[0] + target_data.shape[0], 1]).cuda()
# 设定source data的label是1
domain_label[:source_data.shape[0]] = 1
# Step 1 : 训练Domain Classifier
feature = feature_extractor(mixed_data)
domain_logits = domain_classifier(feature.detach())
loss = domain_criterion(domain_logits, domain_label)
running_D_loss += loss.item()
loss.backward()
optimizer_D.step()
# Step 2 : 训练Feature Extractor和Label Predictor
class_logits = label_predictor(feature[:source_data.shape[0]])
domain_logits = domain_classifier(feature)
# loss为原本的的class CE - lamb * domain BCE,相减的原因和GAN中的Discriminator中的G loss一致
loss = class_criterion(class_logits, source_label) - lamb * domain_criterion(domain_logits, domain_label)
running_F_loss += loss.item()
loss.backward()
optimizer_F.step()
optimizer_C.step()
optimizer_D.zero_grad()
optimizer_F.zero_grad()
optimizer_C.zero_grad()
total_hit += torch.sum(torch.argmax(class_logits, dim=1) == source_label).item()
total_num += source_data.shape[0]
print(i, end='\r')
return running_D_loss / (i + 1), running_F_loss / (i + 1), total_hit / total_num
if __name__ == "__main__":
source_dataset = ImageFolder('train_data', transform=source_transform)
target_dataset = ImageFolder('test_data', transform=target_transform)
source_dataloader = DataLoader(source_dataset, batch_size=32, shuffle=True)
target_dataloader = DataLoader(target_dataset, batch_size=32, shuffle=True)
test_dataloader = DataLoader(target_dataset, batch_size=128, shuffle=False)
feature_extractor = FeatureExtractor().cuda()
label_predictor = LabelPredictor().cuda()
domain_classifier = DomainClassifier().cuda()
class_criterion = nn.CrossEntropyLoss()
domain_criterion = nn.BCEWithLogitsLoss()
optimizer_F = optim.Adam(feature_extractor.parameters())
optimizer_C = optim.Adam(label_predictor.parameters())
optimizer_D = optim.Adam(domain_classifier.parameters())
for epoch in range(200):
train_D_loss, train_F_loss, train_acc = train_epoch(source_dataloader, target_dataloader, lamb=0.1)
torch.save(feature_extractor.state_dict(), f'extractor_model.bin')
torch.save(label_predictor.state_dict(), f'predictor_model.bin')
print('epoch {:>3d}: train D loss: {:6.4f}, train F loss: {:6.4f}, acc {:6.4f}'.format(epoch, train_D_loss,
train_F_loss, train_acc))
predict.py
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Function
import cv2
import matplotlib.pyplot as plt
import torch.optim as optim
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from hw12 import *
import pandas as pd
if __name__ == "__main__":
target_dataset = ImageFolder('test_data', transform=target_transform)
target_dataloader = DataLoader(target_dataset, batch_size=32, shuffle=True)
test_dataloader = DataLoader(target_dataset, batch_size=128, shuffle=False)
feature_extractor = FeatureExtractor().cuda()
label_predictor = LabelPredictor().cuda()
result = []
feature_extractor.load_state_dict(torch.load('extractor_model.bin'))
label_predictor.load_state_dict(torch.load('predictor_model.bin'))
label_predictor.eval()
feature_extractor.eval()
for i, (test_data, _) in enumerate(test_dataloader):
test_data = test_data.cuda()
class_logits = label_predictor(feature_extractor(test_data))
x = torch.argmax(class_logits, dim=1).cpu().detach().numpy()
result.append(x)
result = np.concatenate(result)
# Generate your submission
df = pd.DataFrame({'id': np.arange(0,len(result)), 'label': result})
df.to_csv('DaNN_submission.csv',index=False)