前言
本文主要基于pspNet的网络结构搭建
配合文章:语义分割框架链接
一、PSPNet官网图
二、代码实现
1.ResNet主干网络实现
class ResBlock(PickableSequentialChain):
def __init__(self, n_layer, in_channels, mid_channels, out_channels, stride, dilate=1, groups=1, initialW=None, bn_kwargs={}, stride_first=False, add_seblock=False):
super(ResBlock, self).__init__()
with self.init_scope():
self.a = Bottleneck(in_channels, mid_channels, out_channels, stride, dilate, groups, initialW, bn_kwargs=bn_kwargs, residual_conv=True, stride_first=stride_first, add_seblock=add_seblock)
for i in range(n_layer - 1):
name = 'b{}'.format(i + 1)
bottleneck = Bottleneck(out_channels, mid_channels, out_channels, stride=1, dilate=dilate, initialW=initialW, bn_kwargs=bn_kwargs, residual_conv=False, add_seblock=add_seblock, groups=groups)
setattr(self, name, bottleneck)
class Bottleneck(chainer.Chain):
def __init__(self, in_channels, mid_channels, out_channels, stride=1, dilate=1, groups=1, initialW=None, bn_kwargs={}, residual_conv=False, stride_first=False, add_seblock=False):
if stride_first:
first_stride = stride
second_stride = 1
else:
first_stride = 1
second_stride = stride
super(Bottleneck, self).__init__()
with self.init_scope():
self.conv1 = Conv2DBNActiv(in_channels, mid_channels, 1, first_stride, 0, nobias=True, initialW=initialW, bn_kwargs=bn_kwargs)
# pad = dilate
self.conv2 = Conv2DBNActiv(mid_channels, mid_channels, 3, second_stride, dilate, dilate, groups, nobias=True, initialW=initialW, bn_kwargs=bn_kwargs)
self.conv3 = Conv2DBNActiv(mid_channels, out_channels, 1, 1, 0, nobias=True, initialW=initialW, activ=None, bn_kwargs=bn_kwargs)
if add_seblock:
self.se = SEBlock(out_channels)
if residual_conv:
self.residual_conv = Conv2DBNActiv(in_channels, out_channels, 1, stride, 0, nobias=True, initialW=initialW, activ=None, bn_kwargs=bn_kwargs)
def forward(self, x):
h = self.conv1(x)
h = self.conv2(h)
h = self.conv3(h)
if hasattr(self, 'se'):
h = self.se(h)
if hasattr(self, 'residual_conv'):
residual = self.residual_conv(x)
else:
residual = x
h += residual
h = F.relu(h)
return h
2.PSPNet网络结构
class PyramidPoolingModule(chainer.ChainList):
def __init__(self, in_channels, feat_size, pyramids, initialW=None):
out_channels = in_channels // len(pyramids)
super(PyramidPoolingModule, self).__init__(
Conv2DBNActiv(in_channels, out_channels, 1, 1, 0, 1, initialW=initialW),
Conv2DBNActiv(in_channels, out_channels, 1, 1, 0, 1, initialW=initialW),
Conv2DBNActiv(in_channels, out_channels, 1, 1, 0, 1, initialW=initialW),
Conv2DBNActiv(in_channels, out_channels, 1, 1, 0, 1, initialW=initialW),
)
kh = feat_size[0] // np.array(pyramids)
kw = feat_size[1] // np.array(pyramids)
self.ksizes = list(zip(kh, kw))
def forward(self, x):
ys = [x]
H, W = x.shape[2:]
for f, ksize in zip(self, self.ksizes):
y = F.average_pooling_2d(x, ksize, ksize)
y = f(y)
y = F.resize_images(y, (H, W))
ys.append(y)
return F.concat(ys, axis=1)
class DilatedResNet(PickableSequentialChain):
_blocks = {
50: [3, 4, 6, 3],
101: [3, 4, 23, 3],
152: [3, 8, 36, 3],
}
def __init__(self, n_layer, initialW=None,alpha=1):
n_block = self._blocks[n_layer]
self.alpha=alpha
super(DilatedResNet, self).__init__()
with self.init_scope():
self.conv1_1 = Conv2DBNActiv(None, 64//self.alpha, 3, 2, 1, 1, initialW=initialW)
self.conv1_2 = Conv2DBNActiv(64//self.alpha, 64//self.alpha, 3, 1, 1, 1, initialW=initialW)
self.conv1_3 = Conv2DBNActiv(64//self.alpha, 128//self.alpha, 3, 1, 1, 1, initialW=initialW)
self.pool1 = lambda x: F.max_pooling_2d(x, ksize=3, stride=2, pad=1)
self.res2 = ResBlock(n_block[0], 128//self.alpha, 64//self.alpha, 256//self.alpha, 1, 1, initialW=initialW, stride_first=False)
self.res3 = ResBlock(n_block[1], 256//self.alpha, 128//self.alpha, 512//self.alpha, 2, 1, initialW=initialW, stride_first=False)
self.res4 = ResBlock(n_block[2], 512//self.alpha, 256//self.alpha, 1024//self.alpha, 1, 2, initialW=initialW, stride_first=False)
self.res5 = ResBlock(n_block[3], 1024//self.alpha, 512//self.alpha, 2048//self.alpha, 1, 4, initialW=initialW, stride_first=False)
self.pick = ('res4', 'res5')
class PSPNet(chainer.Chain):
def __init__(self,n_layer=50, n_class=None, image_size=None, initialW=None,alpha=1):
super(PSPNet, self).__init__()
self.image_size = image_size
self.alpha=alpha
if initialW is None:
initialW = chainer.initializers.HeNormal()
pyramids = [6, 3, 2, 1]
feat_size = (self.image_size[0] // 8, self.image_size[1] // 8)
with self.init_scope():
self.extractor = DilatedResNet(n_layer=n_layer, initialW=initialW,alpha=self.alpha)
self.ppm = PyramidPoolingModule(2048//self.alpha, feat_size, pyramids, initialW=initialW)
self.head_conv1 = Conv2DBNActiv(4096//self.alpha, 512//self.alpha, 3, 1, 1, initialW=initialW)
self.head_conv2 = L.Convolution2D(512//self.alpha, n_class, 1, 1, 0, False, initialW)
@property
def n_class(self):
return self.head_conv2.out_channels
def forward(self, x):
_, res5 = self.extractor(x)
h = self.ppm(res5)
h = self.head_conv1(h)
h = self.head_conv2(h)
h = F.resize_images(h, x.shape[2:])
return h
def predict(self, imgs):
labels = []
for img in imgs:
C, H, W = img.shape
with chainer.using_config('train', False), chainer.function.no_backprop_mode():
x = chainer.Variable(self.xp.asarray(img[np.newaxis]))
score = self.forward(x)[0].array
score = chainer.backends.cuda.to_cpu(score)
if score.shape != (C, H, W):
dtype = score.dtype
score = resize(score, (H, W)).astype(dtype)
label = np.argmax(score, axis=0).astype(np.int32)
labels.append(label)
return labels
调用方式
model = PSPNet(n_layer=self.n_layer, n_class=len(self.classes_names), image_size=(self.image_size,self.image_size), initialW=None,alpha=self.alpha)
self.train_chain = TrainChain(model)
self.model = self.train_chain.model
if self.gpu_devices >= 0:
chainer.cuda.get_device_from_id(self.gpu_devices).use()
self.train_chain.to_gpu()