1.个人的设想
def forward(self,x):
"""残差模块"""
resudial = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(x)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(x)
out += resudial
out = self.relu(out)
return out
def forward(self,x):
"""前向传播过程"""
x = self.conv1(x)#9
x = self.relu(x)
residual = x
out = self.resBlock(x)#131*4
out = self.conv2(out)#1
out = self.bn2(out)
out += residual
out = self.convPos1(out)#1
out = self.pixelShuffler1(out)
out = self.reluPos1(out)
out = self.finConv(out)#9
return out
2.SRGAN中的SRRESNET
class Block(nn.Module):
def __init__(self, input_channel=64, output_channel=64, kernel_size=3, stride=1, padding=1):
super().__init__()
self.layer = nn.Sequential(
nn.Conv2d(input_channel, output_channel, kernel_size, stride, bias=False, padding=1),
nn.BatchNorm2d(output_channel),
nn.PReLU(),
nn.Conv2d(output_channel, output_channel, kernel_size, stride, bias=False, padding=1),
nn.BatchNorm2d(output_channel)
)
def forward(self, x0):
x1 = self.layer(x0)
return x0 + x1
class Block(nn.Module):
def __init__(self, input_channel=64, output_channel=64, kernel_size=3, stride=1, padding=1):
super().__init__()
self.layer = nn.Sequential(
nn.Conv2d(input_channel, output_channel, kernel_size, stride, bias=False, padding=1),
nn.BatchNorm2d(output_channel),
nn.PReLU(),
nn.Conv2d(output_channel, output_channel, kernel_size, stride, bias=False, padding=1),
nn.BatchNorm2d(output_channel)
)
def forward(self, x0):
x1 = self.layer(x0)
return x0 + x1
3.EDSR(Enhanced Deep Residual Networks for Single Image Super-Resolution)
Batch Norm不仅可以使训练更深的网络变容易,加速收敛,还有一定正则化的效果,可以防止模型过拟合。但在图像超分辨率和图像生成方面,Batch Norm的表现并不好,加入了Batch Norm,反而使得训练速度缓慢,不稳定,甚至最后发散。
以图像超分辨率来说,网络输出的图像在色彩、对比度、亮度上要求和输入一致,改变的仅仅是分辨率和一些细节,而Batch Norm,对图像来说类似于一种对比度的拉伸,任何图像经过Batch Norm后,其色彩的分布都会被归一化,也就是说,它破坏了图像原本的对比度信息,所以Batch Norm的加入反而影响了网络输出的质量。与 SRResNet 相比,我们没有批归一化层的基准模型,在训练过程当中大概减少了 40% 的内存使用率。
class ResBlock(nn.Module):
"""残差模块"""
def __init__(self,inChannals,outChannals):
"""初始化残差模块"""
super(ResBlock,self).__init__()
self.conv1 = nn.Conv2d(inChannals,outChannals,kernel_size=3,stride=1,padding=1,bias=True)
#self.bn1 = nn.BatchNorm2d(outChannals)
self.conv2 = nn.Conv2d(outChannals,outChannals,kernel_size=3,stride=1,padding=1,bias=True)
#self.bn2 = nn.BatchNorm2d(outChannals)
#self.conv3 = nn.Conv2d(outChannals,outChannals,kernel_size=1,bias=False)
self.relu = nn.ReLU()
def forward(self,x):
"""前向传播过程"""
resudial = x
out = self.conv1(x)
#out = self.bn1(out)
out = self.relu(out)
out = self.conv2(x)
#out = self.bn2(out)
out += resudial
#out = self.relu(out)
return out
去掉了批量归一化层,所以就要加入偏执bias
4.WDSR(Wide Activation for Efficient and Accurate Image Super-Resolution)
4.1结构
WDSR在EDSR上的结构提升,一方面是去除了很多冗余的卷积层,这样计算更快。另一方面是改造了resblock。我们一分为二来看,去除冗余的卷积层(如上图阴影部分)作者认为这些层的效果是可以吸收到resbody里面的,通过去除实验之后,发现效果并没有下降,所以去除冗余卷积层可以降低计算开销。
左图呢就是ESDR的原始resblock,中间是WDSR-A,右边的是WDSR-B。作者在文中提出了两个版本的WDSR,这两个版本的区别就是resblock不同而已。 对于EDSR中的resblock,称之为原始resblock,relu是在两个卷积运算中间,而且卷积核的filter数较少;而WDSR-A是在不增加计算开销的前提下,增加relu前卷积核的filter数以增加feature map的宽度。WDSR-B进一步解放了计算开销,将relu后的大卷积核拆分成两个小卷积核,这样可以在同样计算开销的前提下获得更宽泛的激活函数前的特征图(即channel数可以更多)。
4.2代码学习
wdsr_a
body.append(
wn(nn.Conv2d(n_feats, block_feats, 3, padding=kernel_size//2)))#block_feats=512
body.append(act)
body.append(
wn(nn.Conv2d(block_feats, n_feats, 3, padding=kernel_size//2)))
wdsr_b
body.append(
wn(nn.Conv2d(n_feats, n_feats*expand, 1, padding=1//2))) # channels:64->64*6
body.append(act)
body.append( # channels:64*6->64*0.8
wn(nn.Conv2d(n_feats*expand, int(n_feats*linear), 1, padding=1//2)))
body.append( # channels:64*0.8->64
wn(nn.Conv2d(int(n_feats*linear), n_feats, kernel_size, padding=kernel_size//2)))
head.append(
wn(nn.Conv2d(args.n_colors, n_feats, 3, padding=3//2))) # channels:3->64
body.append( # ## 参数:64, 3, relu, 1, wn
Block(n_feats, kernel_size, act=act, res_scale=args.res_scale, wn=wn))
tail.append(
wn(nn.Conv2d(n_feats, out_feats, 3, padding=3//2))) # channels:64->4*4*3
tail.append(nn.PixelShuffle(scale))
skip.append(
wn(nn.Conv2d(args.n_colors, out_feats, 5, padding=5//2)) # channels:3->4*4*3
)
skip.append(nn.PixelShuffle(scale))
def forward(self, x):
x = (x - self.rgb_mean.cuda()*255)/127.5
s = self.skip(x)#整个结构上的残差
x = self.head(x)
x = self.body(x)
x = self.tail(x)
x += s
x = x*127.5 + self.rgb_mean.cuda()*255
return x
没有使用BN但是使用了BN。BN也作为一种参数重写的normalization,相比与BN,BN带有如下优点:WN的计算量非常低,并且其不会因为mini-batch的随机性而引入噪声统计。在RNN,LSTM,或者Reinforcement Learning上,WN能够表现出比BN更好的性能。
4.3 Weight Normalization
Weight Normalization是对网络权值W进行normalization,因此也称为Weight Normalization;Batch Normalization是对网络某一层输入数据进normalization。
5.总结
sub-pixel convolution这种新型卷积就是为SR量身定做的,反卷积效果不行,暂时还没看到其他的上采样操作。
目前的SR论文都证明网络的深度会极大的影响SR的效果,但是可能在特征超分辨率上效果不尽人意,而且过深和过于复杂的网络也不符合特征超分辨率的初衷。
是否要再提高通道数,在WDSR中通过拓宽通道得到了效果提升
参考:https://blog.csdn.net/leviopku/article/details/85048846