DeepLab: Semantic Image Segmentation with Deep Convolutional Nets, Atrous Convolution, and Fully Connected CRFs 论文解读
V1链接:https://blog.csdn.net/weixin_44543648/article/details/122576853
V3链接:https://blog.csdn.net/weixin_44543648/article/details/122829741
论文链接:https://arxiv.org/pdf/1606.00915.pdf
代码链接:https://github.com/dontLoveBugs/Deeplab_pytorch
V2目标解决的问题和部分内容与V1相似,具体不过多叙述,可以看上面链接。
主要内容:
- 采用了空洞卷积,即卷积层设置dilation > 1。空洞卷积优势如下,上面为普通卷积,下面为空洞卷积。
- 使用ASPP进行多尺度下融合特征,实际应用中,即设置不同dilation进行多尺度输出再相加。多尺度融合过程如下
- 使用CRF进行边界的精确化。
- 使用Resnet作为backbone。
代码:
ASPP module
class ASPP_module(nn.Module):
def __init__(self, inplanes, planes, rate):
super(ASPP_module, self).__init__()
if rate == 1:
kernel_size = 1
padding = 0
else:
kernel_size = 3
padding = rate
self.atrous_convolution = nn.Conv2d(inplanes, planes, kernel_size=kernel_size,
stride=1, padding=padding, dilation=rate, bias=False)
self.bn = nn.BatchNorm2d(planes)
self.relu = nn.ReLU()
self._init_weight()
def forward(self, x):
x = self.atrous_convolution(x)
x = self.bn(x)
return self.relu(x)
def _init_weight(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
torch.nn.init.kaiming_normal_(m.weight)
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
网络结构
from torch.utils import model_zoo
from network.base.resnet import *
from network.base.oprations import *
class DeeplabV2(ResNet):
def __init__(self, n_class, block, layers, pyramids):
print("Constructing DeepLabv2 model...")
print("Number of classes: {}".format(n_class))
super(DeeplabV2, self).__init__()
self.inplanes = 64
self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False)
self.bn1 = nn.BatchNorm2d(self.inplanes, affine=True)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=True) # change
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
self.layer3 = self._make_layer(block, 256, layers[2], stride=1, rate=2)
self.layer4 = self._make_layer(block, 512, layers[3], stride=1, rate=4)
self.aspp1 = ASPP_module(2048, n_class, pyramids[0])
self.aspp2 = ASPP_module(2048, n_class, pyramids[1])
self.aspp3 = ASPP_module(2048, n_class, pyramids[2])
self.aspp4 = ASPP_module(2048, n_class, pyramids[3])
self.init_weight()
def forward(self, input):
x = self.conv1(input)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x1 = self.aspp1(x)
x2 = self.aspp2(x)
x3 = self.aspp3(x)
x4 = self.aspp4(x)
x = x1 + x2 + x3 + x4
x = F.upsample(x, size=input.size()[2:], mode='bilinear', align_corners=True)
return x
def get_1x_lr_params(self):
b = [self.conv1, self.bn1, self.layer1, self.layer2, self.layer3, self.layer4]
for i in range(len(b)):
for k in b[i].parameters():
if k.requires_grad:
yield k
def get_10x_lr_params(self):
b = [self.aspp1, self.aspp2, self.aspp3, self.aspp4]
for j in range(len(b)):
for k in b[j].parameters():
if k.requires_grad:
yield k
def freeze_bn(self):
for m in self.modules():
if isinstance(m, nn.BatchNorm2d):
m.eval()
def freeze_backbone_bn(self):
self.bn1.eval()
for m in self.layer1:
if isinstance(m, nn.BatchNorm2d):
m.eval()
for m in self.layer2:
if isinstance(m, nn.BatchNorm2d):
m.eval()
for m in self.layer3:
if isinstance(m, nn.BatchNorm2d):
m.eval()
for m in self.layer4:
if isinstance(m, nn.BatchNorm2d):
m.eval()
def resnet101(n_class, pretrained=True):
model = DeeplabV2(n_class=n_class, block=Bottleneck, layers=[3, 4, 23, 3], pyramids=[6, 12, 18, 24])
if pretrained:
pretrain_dict = model_zoo.load_url(model_urls['resnet101'])
model_dict = {}
state_dict = model.state_dict()
for k, v in pretrain_dict.items():
if k in state_dict:
model_dict[k] = v
state_dict.update(model_dict)
model.load_state_dict(state_dict)
return model
if __name__ == '__main__':
model = resnet101(n_class=21, pretrained=True)
img = torch.randn(4, 3, 513, 513)
with torch.no_grad():
output = model.forward(img)
print(output.size())
迁移学习代码
import torch
import torch.nn as nn
import torchvision.models as models
from torchvision.models.resnet import Bottleneck,BasicBlock
class Asppmodule(nn.Module):
def __init__(self, inchannels, ouchannels, rate):
super(Asppmodule, self).__init__()
if rate == 1:
kernel_size = 1
padding = 0
else:
kernel_size = 3
padding = rate
self.conv = nn.Sequential(
nn.Conv2d(inchannels,ouchannels,kernel_size,1,padding,dilation=rate),
nn.BatchNorm2d(ouchannels),
nn.ReLU(inplace=True)
)
def forward(self,x):
return self.conv(x)
class DeepLab_V2(nn.Module):
def __init__(self,inchannels,classes):
super(DeepLab_V2, self).__init__()
resnet = models.resnet50(pretrained=True,replace_stride_with_dilation=[False, True, True])
self.backbone = models._utils.IntermediateLayerGetter(resnet,{"layer4": "out"})
rate = [6,8,12,24]
self.aspp = Asppmodule
self.aspp1 = self.aspp(2048,classes,rate[0])
self.aspp2 = self.aspp(2048, classes, rate[1])
self.aspp3 = self.aspp(2048, classes, rate[2])
self.aspp4 = self.aspp(2048, classes, rate[3])
def forward(self,x):
x = self.backbone(x)['out']
x = self.aspp1(x)+self.aspp2(x)+self.aspp3(x)+self.aspp4(x)
return x
if __name__ == '__main__':
input = torch.empty(1,3,224,224)
m = DeepLab_V2(3,10)
out = m(input)
print(out)