前言
本文基于chainer实现ResNet网络结构,并基于torch的结构方式构建chainer版的,并计算ResNet的参数量。
代码实现(chainer官方)
class BasicBlock_Simple(chainer.Chain):
def __init__(self, n_in, n_mid, n_out, stride=1, proj=False,initialW=chainer.initializers.HeNormal()):
super(BasicBlock_Simple, self).__init__()
with self.init_scope():
self.conv3x3a = L.Convolution2D(n_in, n_mid, 3, stride, 1, initialW=initialW, nobias=True)
self.conv3x3b = L.Convolution2D(n_mid, n_out, 3, 1, 1, initialW=initialW, nobias=True)
self.bn_a = L.BatchNormalization(n_mid)
self.bn_b = L.BatchNormalization(n_out)
if proj:
self.conv1x1r = L.Convolution2D(n_in, n_out, 1, stride, 0, initialW=initialW, nobias=True)
self.bn_r = L.BatchNormalization(n_out)
self.proj = proj
def __call__(self, x):
h = F.relu(self.bn_a(self.conv3x3a(x)))
h = self.bn_b(self.conv3x3b(h))
if self.proj:
x = self.bn_r(self.conv1x1r(x))
return F.relu(h + x)
class BottleNeck_Simple(chainer.Chain):
def __init__(self, n_in, n_mid, n_out, stride=1, proj=False,initialW=chainer.initializers.HeNormal()):
super(BottleNeck_Simple, self).__init__()
with self.init_scope():
self.conv1x1a = L.Convolution2D(n_in, n_mid, 1, stride, 0, initialW=initialW, nobias=True)
self.conv3x3b = L.Convolution2D(n_mid, n_mid, 3, 1, 1, initialW=initialW, nobias=True)
self.conv1x1c = L.Convolution2D(n_mid, n_out, 1, 1, 0, initialW=initialW, nobias=True)
self.bn_a = L.BatchNormalization(n_mid)
self.bn_b = L.BatchNormalization(n_mid)
self.bn_c = L.BatchNormalization(n_out)
if proj:
self.conv1x1r = L.Convolution2D(n_in, n_out, 1, stride, 0, initialW=initialW, nobias=True)
self.bn_r = L.BatchNormalization(n_out)
self.proj = proj
def __call__(self, x):
h = F.relu(self.bn_a(self.conv1x1a(x)))
h = F.relu(self.bn_b(self.conv3x3b(h)))
h = self.bn_c(self.conv1x1c(h))
if self.proj:
x = self.bn_r(self.conv1x1r(x))
return F.relu(h + x)
class ResBlock_Simple(chainer.ChainList):
def __init__(self, block, n_layers, n_in, n_mid, n_out, stride=2):
super(ResBlock_Simple, self).__init__()
self.add_link(block(n_in, n_mid, n_out, stride, True))
for _ in range(n_layers - 1):
self.add_link(block(n_out, n_mid, n_out))
def __call__(self, x):
for f in self.children():
x = f(x)
return x
class ResNet_Simple(chainer.Chain):
cfgs={
'resnet18':{'block':BasicBlock_Simple, 'blocks_num':[2, 2, 2, 2],'expansion':1},
'resnet34':{'block':BasicBlock_Simple, 'blocks_num':[3, 4, 6, 3],'expansion':1},
'resnet50':{'block':BottleNeck_Simple, 'blocks_num':[3, 4, 6, 3],'expansion':4},
'resnet101':{'block':BottleNeck_Simple, 'blocks_num':[3, 4, 23, 3],'expansion':4},
'resnet152':{'block':BottleNeck_Simple, 'blocks_num':[3, 8, 36, 3],'expansion':4},
}
def __init__(self, num_classes=1000, model_name='resnet18',channels=3,image_size=224,initialW=chainer.initializers.HeNormal()):
super(ResNet_Simple, self).__init__()
with self.init_scope():
self.conv1 = L.Convolution2D(in_channels=channels, out_channels = 64, ksize=7, stride=2, pad=3, initialW=initialW, nobias=True)
self.bn1 = L.BatchNormalization(64)
self.res2 = ResBlock_Simple(self.cfgs[model_name]['block'],self.cfgs[model_name]['blocks_num'][0], 64, 64, 64*self.cfgs[model_name]['expansion'], 1)
self.res3 = ResBlock_Simple(self.cfgs[model_name]['block'],self.cfgs[model_name]['blocks_num'][1], 64*self.cfgs[model_name]['expansion'], 128, 128*self.cfgs[model_name]['expansion'])
self.res4 = ResBlock_Simple(self.cfgs[model_name]['block'],self.cfgs[model_name]['blocks_num'][2], 128*self.cfgs[model_name]['expansion'], 256, 256*self.cfgs[model_name]['expansion'])
self.res5 = ResBlock_Simple(self.cfgs[model_name]['block'],self.cfgs[model_name]['blocks_num'][3], 256*self.cfgs[model_name]['expansion'], 512, 512*self.cfgs[model_name]['expansion'])
self.fc6 = L.Linear(512*self.cfgs[model_name]['expansion'], num_classes)
def __call__(self, x):
h = self.bn1(self.conv1(x))
h = F.max_pooling_2d(F.relu(h), 2, 2)
h = self.res2(h)
h = self.res3(h)
h = self.res4(h)
h = self.res5(h)
h = F.average_pooling_2d(h, h.shape[2:], stride=1)
h = self.fc6(h)
if chainer.config.train:
return h
return F.softmax(h)
代码实现(基于torch结构版的chainer)
class BasicBlock(chainer.Chain):
expansion = 1
def __init__(self,layers_num,block_num, in_channel, out_channel, stride=1, downsample=None,initialW=None, **kwargs):
super(BasicBlock, self).__init__()
layers=[]
layers += [('block{0}_{1}_conv1'.format(layers_num,block_num),L.Convolution2D(in_channels=in_channel, out_channels=out_channel, ksize=3, stride=stride, pad=1, nobias=True,initialW=initialW))]
layers += [('block{0}_{1}_bn1'.format(layers_num,block_num),L.BatchNormalization(out_channel))]
layers += [('_block{0}_{1}_relu'.format(layers_num,block_num),ReLU())]
layers += [('block{0}_{1}_conv2'.format(layers_num,block_num),L.Convolution2D(in_channels=out_channel, out_channels=out_channel, ksize=3, stride=1, pad=1, nobias=True,initialW=initialW))]
layers += [('block{0}_{1}_bn2'.format(layers_num,block_num),L.BatchNormalization(out_channel))]
self.downsample = downsample
self.layers=layers
with self.init_scope():
for n in self.layers:
if not n[0].startswith('_'):
setattr(self, n[0], n[1])
if self.downsample is not None:
for n in self.downsample:
if not n[0].startswith('_'):
setattr(self, n[0], n[1])
def __call__(self, x):
identity = x
if self.downsample is not None:
for n, f in self.downsample:
if not n.startswith('_'):
identity = getattr(self, n)(identity)
else:
identity = f.apply((identity,))[0]
for n, f in self.layers:
if not n.startswith('_'):
x = getattr(self, n)(x)
else:
x = f.apply((x,))[0]
x += identity
out = F.relu(x)
return out
class Bottleneck(chainer.Chain):
expansion = 4
def __init__(self, layers_num,block_num, in_channel, out_channel, stride=1, downsample=None,initialW=None, **kwargs):
super(Bottleneck, self).__init__()
layers=[]
layers += [('block{0}_{1}_conv1'.format(layers_num,block_num),L.Convolution2D(in_channels=in_channel, out_channels=out_channel, ksize=1, stride=1, nobias=True,initialW=initialW))]
layers += [('block{0}_{1}_bn1'.format(layers_num,block_num),L.BatchNormalization(out_channel))]
layers += [('_block{0}_{1}_relu1'.format(layers_num,block_num),ReLU())]
layers += [('block{0}_{1}_conv2'.format(layers_num,block_num),L.Convolution2D(in_channels=out_channel, out_channels=out_channel, ksize=3, stride=stride, nobias=True, pad=1,initialW=initialW))]
layers += [('block{0}_{1}_bn2'.format(layers_num,block_num),L.BatchNormalization(out_channel))]
layers += [('_block{0}_{1}_relu2'.format(layers_num,block_num),ReLU())]
layers += [('block{0}_{1}_conv3'.format(layers_num,block_num),L.Convolution2D(in_channels=out_channel, out_channels=out_channel*self.expansion, ksize=1, stride=1, nobias=True,initialW=initialW))]
layers += [('block{0}_{1}_bn3'.format(layers_num,block_num),L.BatchNormalization(out_channel*self.expansion))]
self.downsample = downsample
self.layers=layers
with self.init_scope():
for n in self.layers:
if not n[0].startswith('_'):
setattr(self, n[0], n[1])
if self.downsample is not None:
for n in self.downsample:
if not n[0].startswith('_'):
setattr(self, n[0], n[1])
def forward(self, x):
identity = x
if self.downsample is not None:
for n, f in self.downsample:
if not n.startswith('_'):
identity = getattr(self, n)(identity)
else:
identity = f.apply((identity,))[0]
for n, f in self.layers:
if not n.startswith('_'):
x = getattr(self, n)(x)
else:
x = f.apply((x,))[0]
x += identity
out = F.relu(x)
return out
class ResNet_Complex(chainer.Chain):
cfgs={
'resnet18':{'block':BasicBlock, 'blocks_num':[2, 2, 2, 2]},
'resnet34':{'block':BasicBlock, 'blocks_num':[3, 4, 6, 3]},
'resnet50':{'block':Bottleneck, 'blocks_num':[3, 4, 6, 3]},
'resnet101':{'block':Bottleneck, 'blocks_num':[3, 4, 23, 3]},
'resnet152':{'block':Bottleneck, 'blocks_num':[3, 8, 36, 3]},
}
def _make_layer(self,layers_num, block, channel, block_num, stride=1,initialW=None):
downsample = None
if stride != 1 or self.in_channel != channel * block.expansion:
downsample = [
("downsample_{0}".format(layers_num),L.Convolution2D(in_channels=self.in_channel, out_channels=channel * block.expansion, ksize=1, stride=stride, nobias=True,initialW=initialW)),
("downsample_bn_{0}".format(layers_num),L.BatchNormalization(channel * block.expansion))
]
layers = []
# 每个conv2/conv3/conv4/conv5下的第一层残差结构
layers += [("blobk_{0}_0".format(layers_num),block(layers_num,0,self.in_channel, channel, downsample=downsample, stride=stride,initialW=initialW))]
self.in_channel = channel * block.expansion
# 过滤调第一层残差结构,从1开始遍历
for _ in range(1, block_num):
layers += [('block_{0}_{1}'.format(layers_num,_),block(layers_num,_,self.in_channel, channel))]
return layers
def __init__(self, num_classes=1000, model_name='resnet18',channels=3,image_size=224,initialW=chainer.initializers.HeNormal(), **kwargs):
super(ResNet_Complex, self).__init__()
block = self.cfgs[model_name]['block']
blocks_num = self.cfgs[model_name]['blocks_num']
self.in_channel = 64
out_size = image_size
layers=[]
# 224*224*3 -> int((224-7+2*3)/2+1):112*112*64
out_size = int((out_size-7+2*3)/2+1)
layers += [('conv1',L.Convolution2D(channels, self.in_channel, ksize=7, stride=2, pad=3, nobias=True,initialW=initialW))]
layers += [('bn1',L.BatchNormalization(self.in_channel))]
layers += [('_relu1',ReLU())]
# 112*112*64 -> 112/2:56*56*64
out_size = math.ceil(out_size/2)
layers += [("_maxpool1",MaxPooling2D(ksize=3, stride=2, pad=1))]
# 56*56*64 -> 56*56*128
self.layer1 = self._make_layer(1,block, 64, blocks_num[0],initialW=initialW)
# 56*56*128 -> 28*28*256
out_size = math.ceil(out_size/2)
self.layer2 = self._make_layer(2,block, 128, blocks_num[1], stride=2,initialW=initialW)
# 28*28*256 -> 14*14*512
out_size = math.ceil(out_size/2)
self.layer3 = self._make_layer(3,block, 256, blocks_num[2], stride=2,initialW=initialW)
# 14*14*512 -> 7*7*2048
out_size = math.ceil(out_size/2)
self.layer4 = self._make_layer(4,block, 512, blocks_num[3], stride=2,initialW=initialW)
layers += [('layer1',self.layer1)]
layers += [('layer2',self.layer2)]
layers += [('layer3',self.layer3)]
layers += [('layer4',self.layer4)]
layers += [('_avgpool',AveragePooling2D(ksize=out_size,stride=2,pad=0))]
# 7*7*2048 -> 2048,num_classes
layers += [('fc',L.Linear(512 * block.expansion, num_classes,initialW=initialW))]
self.layers = layers
with self.init_scope():
for n in self.layers:
if not n[0].startswith('_'):
setattr(self, n[0], n[1])
if n[0].startswith('layer'):
for each_layer in n[1]:
if not each_layer[0].startswith('_'):
setattr(self, each_layer[0], each_layer[1])
def forward(self, x):
for n, f in self.layers:
origin_size = x.shape
if not n.startswith('_'):
if n.startswith('layer'):
for n2, f2 in f:
if not n2.startswith('_'):
x = getattr(self, n2)(x)
else:
x = f2.apply((x,))[0]
else:
x = getattr(self, n)(x)
else:
x = f.apply((x,))[0]
print(n,origin_size,x.shape)
if chainer.config.train:
return x
return F.softmax(x)
注意此类就是ResNet的实现过程,注意网络的前向传播过程中,分了训练以及测试。
训练过程中直接返回x,测试过程中会进入softmax得出概率
调用方式
if __name__ == '__main__':
batch_size = 4
n_channels = 3
image_size = 299
num_classes = 10
model_simple = ResNet_Simple(num_classes=num_classes, channels=n_channels,image_size=image_size)
model_complex = ResNet_Complex(num_classes=num_classes, channels=n_channels,image_size=image_size)
print("参数量",model_simple.count_params())
print("参数量",model_complex.count_params())
x = np.random.rand(batch_size, n_channels, image_size, image_size).astype(np.float32)
t = np.random.randint(0, num_classes, size=(batch_size,)).astype(np.int32)
with chainer.using_config('train', True):
y1 = model_simple(x)
y2 = model_complex(x)
loss1 = F.softmax_cross_entropy(y1, t)
loss2 = F.softmax_cross_entropy(y2, t)
print(loss1.data,loss2.data)