前言
本文基于chainer实现ShuffleNet_V2网络结构,并基于torch的结构方式构建chainer版的,并计算ShuffleNet_V2的参数量。
代码实现
def channel_shuffle(x, groups=2):
n, c, h, w = x.shape
x = x.reshape(n, groups, c // groups, h, w)
x = x.transpose(0, 2, 1, 3, 4)
x = x.reshape(n, c, h, w)
return x
class InvertedResidual(chainer.Chain):
def depthwise_conv(self,input_c: int, output_c: int, kernel_s: int, stride: int = 1, padding: int = 0, bias: bool = False):
return L.Convolution2D(in_channels=input_c, out_channels=output_c, ksize=kernel_s, stride=stride, pad=padding, nobias=not bias, groups=input_c)
def __init__(self, input_c: int, output_c: int, stride: int):
super(InvertedResidual, self).__init__()
if stride not in [1, 2]:
raise ValueError("illegal stride value.")
self.stride = stride
assert output_c % 2 == 0
branch_features = output_c // 2
# 当stride为1时,input_channel应该是branch_features的两倍
# python中 '<<' 是位运算,可理解为计算×2的快速方法
assert (self.stride != 1) or (input_c == branch_features << 1)
self.branch1 = []
if self.stride == 2:
self.branch1 += [('depthwise_conv_branch1',self.depthwise_conv(input_c, input_c, kernel_s=3, stride=self.stride, padding=1))]
self.branch1 += [('bn1_branch1',BatchNormalization(input_c))]
self.branch1 += [('conv1_branch1',L.Convolution2D(in_channels=input_c,out_channels=branch_features,ksize=1,stride=1,pad=0,nobias=True))]
self.branch1 += [('bn2_branch1',BatchNormalization(branch_features))]
self.branch1 += [('_relu1_branch1',ReLU())]
self.branch2 = []
self.branch2 += [('conv1_branch2',L.Convolution2D(input_c if self.stride > 1 else branch_features, branch_features, ksize=1,stride=1, pad=0, nobias=True))]
self.branch2 += [('bn1_branch2',BatchNormalization(branch_features))]
self.branch2 += [('_relu1_branch2',ReLU())]
self.branch2 += [('depthwise_conv_branch2',self.depthwise_conv(branch_features, branch_features, kernel_s=3, stride=self.stride, padding=1))]
self.branch2 += [('bn2_branch2',BatchNormalization(branch_features))]
self.branch2 += [('conv2_branch2',L.Convolution2D(branch_features, branch_features, ksize=1, stride=1, pad=0, nobias=True))]
self.branch2 += [('bn3_branch2',BatchNormalization(branch_features))]
self.branch2 += [('_relu2_branch2',ReLU())]
with self.init_scope():
for n in self.branch1:
if not n[0].startswith('_'):
setattr(self, n[0], n[1])
for n in self.branch2:
if not n[0].startswith('_'):
setattr(self, n[0], n[1])
def forward(self, x):
if self.stride == 1:
x1, x2 = F.split_axis(x, 2, axis=1) # x1, x2 = x.chunk(2, dim=1)
temp_x = x2
for n, f in self.branch2:
if not n.startswith('_'):
temp_x = getattr(self, n)(temp_x)
else:
temp_x = f.apply((temp_x,))[0]
out = F.concat((x1, temp_x), axis=1)
else:
temp_x1 = x
for n, f in self.branch1:
if not n.startswith('_'):
temp_x1 = getattr(self, n)(temp_x1)
else:
temp_x1 = f.apply((temp_x1,))[0]
temp_x2 = x
for n, f in self.branch2:
if not n.startswith('_'):
temp_x2 = getattr(self, n)(temp_x2)
else:
temp_x2 = f.apply((temp_x2,))[0]
out = F.concat((temp_x1, temp_x2), axis=1)
out = channel_shuffle(out, 2)
return out
class ShuffleNet_V2(chainer.Chain):
cfgs={
'shufflenetv2_0.5':{'alpha':0.5,'stages_repeats':[4, 8, 4],'stages_out_channels':[24, 48, 96, 192, 1024]},
'shufflenetv2_1.0':{'alpha':1.0,'stages_repeats':[4, 8, 4],'stages_out_channels':[24, 116, 232, 464, 1024]},
'shufflenetv2_1.5':{'alpha':1.5,'stages_repeats':[4, 8, 4],'stages_out_channels':[24, 176, 352, 704, 1024]},
'shufflenetv2_2.0':{'alpha':2.0,'stages_repeats':[4, 8, 4],'stages_out_channels':[24, 244, 488, 976, 2048]}
}
def __init__(self,model_name='shufflenetv2_1.0',
num_classes: int = 1000,
inverted_residual = InvertedResidual,**kwargs):
super(ShuffleNet_V2, self).__init__()
if len(self.cfgs[model_name]['stages_repeats']) != 3:
raise ValueError("expected stages_repeats as list of 3 positive ints")
if len(self.cfgs[model_name]['stages_out_channels']) != 5:
raise ValueError("expected stages_out_channels as list of 5 positive ints")
self._stage_out_channels = self.cfgs[model_name]['stages_out_channels']
# input RGB image
input_channels = 3
output_channels = self._stage_out_channels[0]
self.layers = []
self.layers += [('conv1',L.Convolution2D(input_channels, output_channels, ksize=3, stride=2, pad=1, nobias=True))]
self.layers += [('bn1',BatchNormalization(output_channels))]
self.layers += [('_relu1',ReLU())]
input_channels = output_channels
self.layers += [('_maxpool',MaxPooling2D(ksize=3,stride=2,pad=1))]
stage_names = ["stage{}".format(i) for i in [2, 3, 4]]
for name, repeats, output_channels in zip(stage_names, self.cfgs[model_name]['stages_repeats'], self._stage_out_channels[1:]):
self.layers += [('{0}_1'.format(name),inverted_residual(input_channels, output_channels, 2))]
for i in range(repeats - 1):
self.layers += [('{0}_{1}'.format(name,i+2),inverted_residual(output_channels, output_channels, 1))]
input_channels = output_channels
output_channels = self._stage_out_channels[-1]
self.layers += [('conv2',L.Convolution2D(input_channels, output_channels, ksize=1, stride=1, pad=0, nobias=True))]
self.layers += [('bn2',BatchNormalization(output_channels))]
self.layers += [('_relu2',ReLU())]
self.layers += [('global_pool',functools.partial(F.mean, axis=(2, 3)))]
self.layers += [('fc',L.Linear(output_channels, num_classes))]
with self.init_scope():
for n in self.layers:
if not n[0].startswith('_'):
setattr(self, n[0], n[1])
def forward(self, x):
for n, f in self.layers:
origin_size = x.shape
if not n.startswith('_'):
x = getattr(self, n)(x)
else:
x = f.apply((x,))[0]
print(n,origin_size,x.shape)
if chainer.config.train:
return x
return F.softmax(x)
注意此类就是ShuffleNet_V2的实现过程,注意网络的前向传播过程中,分了训练以及测试。
训练过程中直接返回x,测试过程中会进入softmax得出概率
调用方式
if __name__ == '__main__':
batch_size = 4
n_channels = 3
image_size = 224
num_classes = 123
model = ShuffleNet_V2(num_classes=num_classes, channels=n_channels,image_size=image_size,batch_size=batch_size)
print("参数量",model.count_params())
x = np.random.rand(batch_size, n_channels, image_size, image_size).astype(np.float32)
t = np.random.randint(0, num_classes, size=(batch_size,)).astype(np.int32)
with chainer.using_config('train', True):
y1 = model(x)
loss1 = F.softmax_cross_entropy(y1, t)