代码逻辑和图片来自论文:Dynamic Multi-scale Filters for Semantic Segmentation
class DCM(nn.Module):
def __init__(self, in_channel=64, out_channel=64, filter_size=3):
super(DCM, self).__init__()
self.in_channel = in_channel
self.out_channel = out_channel
self.filter_size = filter_size
self.norm = nn.Sequential(nn.BatchNorm2d(),
nn.ReLU(),
)
def forward(self, x):
pre_filter = F.adaptive_avg_pool2d(x, 3)
b, c, h, w = x.shape
x = x.view(1, b * c, h, w)
pre_filter = pre_filter.view(b * c, 1, self.filter_size, self.filter_size)
# padding for input features
pad = (self.filter_size - 1) // 2
if (self.filter_size - 1) % 2 == 0:
p2d = (pad, pad, pad, pad)
else:
p2d = (pad + 1, pad, pad + 1, pad)
x = F.pad(input=x, pad=p2d, mode='constant', value=0)
output = F.conv2d(input=x, weight=pre_filter, groups=b * c) #实现的是depth-wise convolution
# 若要实现普通卷积,则需将预测卷积核pre_filter凑成 (self.out_channel, self,in_channel, k, k)
# 简单使用F.adaptive_avg_pool2d()可能凑不出来,需要用卷积生成去处理batch_size
output = output.view(b, c, h, w)
output = self.norm(output)
return output
若要实现普通卷积,则需将预测卷积核pre_filter凑成 (self.out_channel, self,in_channel, k, k),简单使用F.adaptive_avg_pool2d()可能凑不出来,需要用卷积生成去处理batch_size,像这篇文章:Video Super-Resolution via Dynamic Local Filter Network
接下来可以试试复现这样的dynamic filter的生成。