吃透Meta-Transfer Learning for Few-Shot Learning的源码
文章信息
标题:Meta-Transfer Learning for Few-Shot Learning
其它:CVPR 2019 1区+ A类
创新点:提出一个meta-transfer learning (MTL)方法
1、可以帮助深度神经网络更快的收敛,减少过拟合
transfer意味着:在大量数据上训练出来的DNN权重用两个轻量级的神经元作用于另一个任务:Scaling and Shifting (SS)缩放+移动(αX + β)
meta意味着:这些被操作的参数可以被当做在少样本学习任务上训练的超参数
2、有着很好的meta-training curriculum
curriculum learning和hard negative mining:更好的安排训练数据可以更好的收敛和实现更好的性能
一、整体代码
""" ResNet with MTL. """
import torch.nn as nn
from models.conv2d_mtl import Conv2dMtl
def conv3x3(in_planes, out_planes, stride=1):
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=1, bias=False)
# 定义基本块类别
class BasicBlock(nn.Module):
expansion = 1 # 通道扩散数。1:通道数输入=输出
# 输入通道数,输出通道数,卷积步幅,下采样
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = nn.BatchNorm2d(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = nn.BatchNorm2d(planes)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x # 保留输入作为残差
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
residual = self.downsample(x) # 如果需要下采样,调整残差
out += residual
out = self.relu(out)
return out
class Bottleneck(nn.Module):
expansion = 4 # 最终输出通道数是中间卷积层输出通道数的4倍
# 最终输出通道数是中间卷积层输出通道数的4倍
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(planes * self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
def conv3x3mtl(in_planes, out_planes, stride=1):
return Conv2dMtl(in_planes, out_planes, kernel_size=3, stride=stride,
padding=1, bias=False)
class BasicBlockMtl(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(BasicBlockMtl, self).__init__()
self.conv1 = conv3x3mtl(inplanes, planes, stride)
self.bn1 = nn.BatchNorm2d(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3mtl(planes, planes)
self.bn2 = nn.BatchNorm2d(planes)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class BottleneckMtl(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(BottleneckMtl, self).__init__()
self.conv1 = Conv2dMtl(inplanes, planes, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = Conv2dMtl(planes, planes, kernel_size=3, stride=stride,
padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.conv3 = Conv2dMtl(planes, planes * self.expansion, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(planes * self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class ResNetMtl(nn.Module):
def __init__(self, layers=[4, 4, 4], mtl=True):
super(ResNetMtl, self).__init__()
if mtl:
self.Conv2d = Conv2dMtl
block = BasicBlockMtl
else:
self.Conv2d = nn.Conv2d
block = BasicBlock
cfg = [160, 320, 640]
self.inplanes = iChannels = int(cfg[0]/2)
self.conv1 = self.Conv2d(3, iChannels, kernel_size=3, stride=1, padding=1)
self.bn1 = nn.BatchNorm2d(iChannels)
self.relu = nn.ReLU(inplace=True)
self.layer1 = self._make_layer(block, cfg[0], layers[0], stride=2)
self.layer2 = self._make_layer(block, cfg[1], layers[1], stride=2)
self.layer3 = self._make_layer(block, cfg[2], layers[2], stride=2)
self.avgpool = nn.AvgPool2d(10, stride=1)
for m in self.modules():
if isinstance(m, self.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
def _make_layer(self, block, planes, blocks, stride=1):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
self.Conv2d(self.inplanes, planes * block.expansion,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(planes * block.expansion),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes))
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.avgpool(x)
x = x.view(x.size(0), -1)
return x
二、分块理解
1.class BasicBlock(nn.Module)
def conv3x3(in_planes, out_planes, stride=1):
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=1, bias=False)
# 定义基本块类别
class BasicBlock(nn.Module):
expansion = 1 # 通道扩散数。1:通道数输入=输出
# 输入通道数,输出通道数,卷积步幅,下采样
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = nn.BatchNorm2d(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = nn.BatchNorm2d(planes)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x # 保留输入作为残差
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
residual = self.downsample(x) # 如果需要下采样,调整残差
out += residual
out = self.relu(out)
return out
expansion:通道扩散数。1代表通道数输入=通道数输出
conv1 卷积层1:3*3
conv2 卷积层2:3*3
bn1 bn2 批量归一化
relu 激活函数
基础块流程:
input->残差
input->conv1->bn1->relu ->conv2->bn2->+残差->relu->out
2.class BasicBlockMtl(nn.Module)
class BasicBlockMtl(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(BasicBlockMtl, self).__init__()
self.conv1 = conv3x3mtl(inplanes, planes, stride)
self.bn1 = nn.BatchNorm2d(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3mtl(planes, planes)
self.bn2 = nn.BatchNorm2d(planes)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
expansion:通道扩散数。1代表通道数输入=通道数输出
conv1 卷积层1:3*3
conv2 卷积层2:3*3
bn1 bn2 批量归一化
relu 激活函数
基础块流程:
input->残差
input->conv1->bn1->relu ->conv2->bn2->+残差->relu->out
3.class ResNetMtl(nn.Module)
class ResNetMtl(nn.Module):
def __init__(self, layers=[4, 4, 4], mtl=True):
super(ResNetMtl, self).__init__()
if mtl:
self.Conv2d = Conv2dMtl
block = BasicBlockMtl
else:
self.Conv2d = nn.Conv2d
block = BasicBlock
cfg = [160, 320, 640]
self.inplanes = iChannels = int(cfg[0]/2)
self.conv1 = self.Conv2d(3, iChannels, kernel_size=3, stride=1, padding=1)
self.bn1 = nn.BatchNorm2d(iChannels)
self.relu = nn.ReLU(inplace=True)
self.layer1 = self._make_layer(block, cfg[0], layers[0], stride=2)
self.layer2 = self._make_layer(block, cfg[1], layers[1], stride=2)
self.layer3 = self._make_layer(block, cfg[2], layers[2], stride=2)
self.avgpool = nn.AvgPool2d(10, stride=1)
for m in self.modules():
if isinstance(m, self.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
def _make_layer(self, block, planes, blocks, stride=1):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
self.Conv2d(self.inplanes, planes * block.expansion,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(planes * block.expansion),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes))
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.avgpool(x)
x = x.view(x.size(0), -1)
return x
__init__方法
1、参数
layers:一个列表,指定每个层的块数量。[4, 4, 4]表示一共三层,每层都有四个块
mtl:布尔值,表示是否是元迁移学习
2、初始化
根据mtl
参数选择使用Conv2dMtl
或nn.Conv2d
作为卷积层,以及使用BasicBlockMtl
或BasicBlock
作为构建块。
cfg
列表包含每层的通道数量。
self.inplanes
和iChannels
用于初始化输入通道数。
方法
x->conv1->bn1->relu->layer->avgpool->out