手把手教你在ResNet_d中添加DCN
1、理论
关于Resnet_vd以及DCN(可变形卷积),大家可以参考我之前的项目, 【动手学Paddle2.0系列】PP-YoLo详解(1) , 【动手学Paddle2.0系列】可变形卷积(Deformable Conv)实战 复习一下相关的理论知识。
2、代码详解
对于ResNet的BasicBlock结构,其包含两层1 * 1卷积,一层3 * 3卷积。如果我们想在其结构中引入DCN,直接将3 * 3卷积更换为DCN即可。如下面的代码所示。
class ConvBNLayer(nn.Layer):
def __init__(self,
num_channels,
num_filters,
filter_size,
stride=1,
groups=1,
is_vd_mode=False,
act=None,
lr_mult=1.0,
name=None):
super(ConvBNLayer, self).__init__()
self.is_vd_mode = is_vd_mode
self._pool2d_avg = AvgPool2D(
kernel_size=2, stride=2, padding=0, ceil_mode=True)
self._conv = Conv2D(
in_channels=num_channels,
out_channels=num_filters,
kernel_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=groups,
weight_attr=ParamAttr(
name=name + "_weights", learning_rate=lr_mult),
bias_attr=False)
if name == "conv1":
bn_name = "bn_" + name
else:
bn_name = "bn" + name[3:]
self._batch_norm = BatchNorm(
num_filters,
act=act,
param_attr=ParamAttr(
name=bn_name + '_scale', learning_rate=lr_mult),
bias_attr=ParamAttr(
bn_name + '_offset', learning_rate=lr_mult),
moving_mean_name=bn_name + '_mean',
moving_variance_name=bn_name + '_variance')
def forward(self, inputs):
if self.is_vd_mode:
inputs = self._pool2d_avg(inputs)
y = self._conv(inputs)
y = self._batch_norm(y)
return y
class BottleneckBlock(nn.Layer):
def __init__(self,
num_channels,
num_filters,
stride,
shortcut=True,
if_first=False,
lr_mult=1.0,
name=None):
super(BottleneckBlock, self).__init__()
self.conv0 = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters,
filter_size=1,
act='relu',
lr_mult=lr_mult,
name=name + "_branch2a")
self.conv1 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters,
filter_size=3,
stride=stride,
act='relu',
lr_mult=lr_mult,
name=name + "_branch2b")
self.conv2 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters * 4,
filter_size=1,
act=None,
lr_mult=lr_mult,
name=name + "_branch2c")
if not shortcut:
self.short = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters * 4,
filter_size=1,
stride=1,
is_vd_mode=False if if_first else True,
lr_mult=lr_mult,
name=name + "_branch1")
self.shortcut = shortcut
def forward(self, inputs):
y = self.conv0(inputs)
conv1 = self.conv1(y)
conv2 = self.conv2(conv1)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
y = paddle.add(x=short, y=conv2)
y = F.relu(y)
return y
class ConvBNLayer_dcn(nn.Layer):
def __init__(self,
num_channels,
num_filters,
filter_size,
stride=1,
groups=1,
is_vd_mode=False,
act=None,
lr_mult=1.0,
name=None):
super(ConvBNLayer_dcn, self).__init__()
self.is_vd_mode = is_vd_mode
self._pool2d_avg = AvgPool2D(
kernel_size=2, stride=2, padding=0, ceil_mode=True)
self._offsets = Conv2D(
in_channels=num_channels,
out_channels=18,
kernel_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=groups,
weight_attr=ParamAttr(
name=name + "_weights1", learning_rate=lr_mult),
bias_attr=False)
self._mask = Conv2D(
in_channels=num_channels,
out_channels=9,
kernel_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=groups,
weight_attr=ParamAttr(
name=name + "_weights2", learning_rate=lr_mult),
bias_attr=False)
self._conv_dcn = paddle.vision.ops.DeformConv2D(
in_channels=num_channels,
out_channels=num_filters,
kernel_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=groups,
weight_attr=ParamAttr(
name=name + "_weights3", learning_rate=lr_mult),
bias_attr=False)
if name == "conv1":
bn_name = "bn_" + name
else:
bn_name = "bn" + name[3:]
self._batch_norm = BatchNorm(
num_filters,
act=act,
param_attr=ParamAttr(
name=bn_name + '_scale', learning_rate=lr_mult),
bias_attr=ParamAttr(
bn_name + '_offset', learning_rate=lr_mult),
moving_mean_name=bn_name + '_mean',
moving_variance_name=bn_name + '_variance')
def forward(self, inputs):
if self.is_vd_mode:
inputs = self._pool2d_avg(inputs)
offset = self._offsets(inputs)
mask = self._mask(inputs)
y = self._conv_dcn(inputs, offset, mask)
y = self._batch_norm(y)
return y
class BottleneckBlock_dcn(nn.Layer):
def __init__(self,
num_channels,
num_filters,
stride,
shortcut=True,
if_first=False,
lr_mult=1.0,
name=None):
super(BottleneckBlock_dcn, self).__init__()
self.conv0 = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters,
filter_size=1,
act='relu',
lr_mult=lr_mult,
name=name + "_branch2a_dcn")
self.conv1 = ConvBNLayer_dcn(
num_channels=num_filters,
num_filters=num_filters,
filter_size=3,
stride=stride,
act='relu',
lr_mult=lr_mult,
name=name + "_branch2b_dcn")
self.conv2 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters * 4,
filter_size=1,
act=None,
lr_mult=lr_mult,
name=name + "_branch2c_dcn")
if not shortcut:
self.short = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters * 4,
filter_size=1,
stride=1,
is_vd_mode=False if if_first else True,
lr_mult=lr_mult,
name=name + "_branch1_dcn")
self.shortcut = shortcut
def forward(self, inputs):
y = self.conv0(inputs)
conv1 = self.conv1(y)
conv2 = self.conv2(conv1)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
y = paddle.add(x=short, y=conv2)
y = F.relu(y)
return y
from work.resnetd_dcn import ResNet50_vddcn
cnn2 = ResNet50_vddcn()
model2 = paddle.Model(cnn2)
model2.summary((64, 3, 224, 224))
from work.resnetd import ResNet50_vd
cnn3 = ResNet50_vd()
model3 = paddle.Model(cnn3)
cnn2 = ResNet50_vddcn()
model2 = paddle.Model(cnn2)
model2.summary((64, 3, 224, 224))
[外链图片转存中…(img-1qeDGEun-1620027627879)]
from work.resnetd import ResNet50_vd
cnn3 = ResNet50_vd()
model3 = paddle.Model(cnn3)
model3.summary((64, 3, 224, 224))
总结
对比二者的模型可视化结果我们可以发现使用了DCN会增加模型的参数量,并且增加模型的大小。通过阅读DCN的论文,我们可以知道,使用这种可变形卷积能够带来较好的性能提升。本次教程侧重于DCN的代码实现及其在resnet_vd中的应用,故未进行对比实验,大家感兴趣的可以自己进行尝试。另外在pp-yolo中,在模型的backbone中也使用了DCN。大家可以结合自己的实际任务进行使用。