1、主要参考
(1)基础框架使用了
https://github.com/WZMIAOMIAO/deep-learning-for-image-processing
(2)VGG16使用了
Pytorch搭建FCN网络_金渐层猫的博客-CSDN博客_pytorch 实现fcn
2、VGGbackbone代码
2.1原理
(1)原理见教程17
(17)语义分割(2)_FCN的原理_chencaw的博客-CSDN博客
(2)
(3)
(4)
2.2 骨干网络
(1)注意网络的FC6和FC7的输出在实现的时候没有使用Dropout,size也使用了1。
(2)当然我们也可以安装上图的方法去实现
from torch import nn
from torchvision.models import vgg16
def vgg_block(num_convs, in_channels, out_channels):
blk = []
for i in range(num_convs):
if i == 0:
blk.append(nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1))
else:
blk.append(nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1))
blk.append(nn.ReLU(inplace=True))
blk.append(nn.MaxPool2d(kernel_size=2, stride=2)) # 宽高减半
return blk
class VGG16(nn.Module):
def __init__(self, pretrained=True):
super(VGG16, self).__init__()
features = []
features.extend(vgg_block(2, 3, 64))
features.extend(vgg_block(2, 64, 128))
features.extend(vgg_block(3, 128, 256))
self.index_pool3 = len(features)
features.extend(vgg_block(3, 256, 512))
self.index_pool4 = len(features)
features.extend(vgg_block(3, 512, 512))
self.features = nn.Sequential(*features)
self.conv6 = nn.Conv2d(512, 4096, kernel_size=1)
self.relu = nn.ReLU(inplace=True)
self.conv7 = nn.Conv2d(4096, 4096, kernel_size=1)
# load pretrained params from torchvision.models.vgg16(pretrained=True)
if pretrained:
pretrained_model = vgg16(pretrained=pretrained)
pretrained_params = pretrained_model.state_dict()
keys = list(pretrained_params.keys())
new_dict = {}
for index, key in enumerate(self.features.state_dict().keys()):
new_dict[key] = pretrained_params[keys[index]]
self.features.load_state_dict(new_dict)
def forward(self, x):
pool3 = self.features[:self.index_pool3](x) # 1/8
pool4 = self.features[self.index_pool3:self.index_pool4](pool3) # 1/16
pool5 = self.features[self.index_pool4:](pool4) # 1/32
conv6 = self.relu(self.conv6(pool5)) # 1/32
conv7 = self.relu(self.conv7(conv6)) # 1/32
return pool3, pool4, conv7
3、FCN-32s的实现
(1)注意接在了conv7的输出上
(2)代码实现,注意输入图像的尺寸必须是32 的整数倍
from operator import imod
from src.vgg_backbone import VGG16
import torch.nn as nn
class FCN_32S(nn.Module):
def __init__(self, num_classes, backbone='vgg'):
super(FCN_32S, self).__init__()
if backbone == 'vgg':
self.features = VGG16()
self.scores1 = nn.Conv2d(4096, num_classes, kernel_size=1)
self.relu = nn.ReLU(inplace=True)
self.scores2 = nn.Conv2d(512, num_classes, kernel_size=1)
self.scores3 = nn.Conv2d(256, num_classes, kernel_size=1)
# torch.nn.ConvTranspose2d(in_channels, out_channels, kernel_size,
# stride=1, padding=0, output_padding=0,
# groups=1, bias=True, dilation=1, padding_mode='zeros')
#输入图像是32的整数倍,如:224 输入模块前的h = 8 ,w = 8
#公式:h_out=(h−1)×stride[0]−2×padding[0]+dilation[0]×(kernel_size[0]−1)+output_padding[0]+1
# 计算 h_out = (h-1)*stride + kernel
self.upsample_32x = nn.ConvTranspose2d(num_classes, num_classes, kernel_size=32, stride=32)
self.upsample_8x = nn.ConvTranspose2d(num_classes, num_classes, kernel_size=8, stride=8)