前言
本文基于chainer实现GoogleNet网络结构,并基于torch的结构方式构建chainer版的,并计算GoogleNet的参数量。
代码实现
# 卷积 + Relu
class BasicConv2d(chainer.Chain):
def __init__(self, in_channels, out_channels, **kwargs):
super(BasicConv2d, self).__init__()
layers=[]
layers += [('conv1',L.Convolution2D(in_channels=in_channels,out_channels=out_channels, **kwargs))]
layers += [('_relu',ReLU())]
self.layers=layers
with self.init_scope():
for n in self.layers:
if not n[0].startswith('_'):
setattr(self, n[0], n[1])
def forward(self, x):
for n, f in self.layers:
if not n.startswith('_'):
x = getattr(self, n)(x)
else:
x = f.apply((x,))[0]
return x
# Inception 块
class Inception(chainer.Chain):
def __init__(self, in_channels, ch1x1, ch3x3red, ch3x3, ch5x5red, ch5x5, pool_proj):
super(Inception, self).__init__()
self.layers_1 = []
self.layers_2 = []
self.layers_3 = []
self.layers_4 = []
# 分支1
# “#1x1参数”
self.layers_1 += [('branch1',BasicConv2d(in_channels, ch1x1, ksize=1, stride=1,pad=0))]
# 分支2
self.layers_2 += [('branch2_reduce',BasicConv2d(in_channels, ch3x3red, ksize=1, stride=1,pad=0))]
self.layers_2 += [('branch2',BasicConv2d(ch3x3red, ch3x3, ksize=3, stride=1, pad=1))]
# 分支3
self.layers_3 += [('branch3_reduce',BasicConv2d(in_channels, ch5x5red, ksize=1, stride=1,pad=0))]
self.layers_3 += [('branch3',BasicConv2d(ch5x5red, ch5x5, ksize=3, stride=1,pad=1))]
# 分支4
self.layers_4 += [('_branch4_maxpooling',MaxPooling2D(ksize=3, stride=1, pad=1))]
self.layers_4 += [('branch4',BasicConv2d(in_channels, pool_proj, ksize=1))]
with self.init_scope():
for n in self.layers_1:
if not n[0].startswith('_'):
setattr(self, n[0], n[1])
for n in self.layers_2:
if not n[0].startswith('_'):
setattr(self, n[0], n[1])
for n in self.layers_3:
if not n[0].startswith('_'):
setattr(self, n[0], n[1])
for n in self.layers_4:
if not n[0].startswith('_'):
setattr(self, n[0], n[1])
def forward(self, x):
base_input = x
for n, f in self.layers_1:
if not n.startswith('_'):
x = getattr(self, n)(x)
else:
x = f.apply((x,))[0]
branch1 = x
x = base_input
for n, f in self.layers_2:
if not n.startswith('_'):
x = getattr(self, n)(x)
else:
x = f.apply((x,))[0]
branch2 = x
x = base_input
for n, f in self.layers_3:
if not n.startswith('_'):
x = getattr(self, n)(x)
else:
x = f.apply((x,))[0]
branch3 = x
x = base_input
for n, f in self.layers_4:
if not n.startswith('_'):
x = getattr(self, n)(x)
else:
x = f.apply((x,))[0]
branch4 = x
return F.concat([branch1, branch2, branch3, branch4])
# 辅助分类器 块
class InceptionAux(chainer.Chain):
def __init__(self, in_channels, num_classes,output_size,alpha):
super(InceptionAux, self).__init__()
self.layers = []
# 平均池话下采样层 卷积核5*5,stride=3
self.layers += [('_averagePool',AveragePooling2D(ksize=5,stride=3,pad=0))]
output_size = int((output_size-5+2*0)/3+1)
# 卷积核1*1,stride=1,深度128
self.layers += [('conv',BasicConv2d(in_channels, int(128*alpha), ksize=1))]# output[batch, 128, 4, 4]
output_size = int((output_size-1+2*0)/1+1)
self.layers +=[("_dropout1",Dropout(0.7))]
self.layers += [('fc1',L.Linear(int(128*alpha) * output_size * output_size, int(1024*alpha)))]
self.layers +=[("_fc1_relu",ReLU())]
self.layers +=[("_dropout2",Dropout(0.7))]
self.layers += [('fc2',L.Linear(int(1024*alpha), num_classes))]
with self.init_scope():
for n in self.layers:
if not n[0].startswith('_'):
setattr(self, n[0], n[1])
def forward(self, x):
for n, f in self.layers:
if not n.startswith('_'):
x = getattr(self, n)(x)
else:
x = f.apply((x,))[0]
return x
class GoogLeNet(chainer.Chain):
cfgs={
'googlenet':None
}
def __init__(self, num_classes=1000, alpha=1,channels=3,image_size=224,batch_size=4,**kwargs):
super(GoogLeNet, self).__init__()
self.image_size = image_size
self.alpha = alpha
self.layers = []
# N x 3 x 224 x 224
self.layers += [('conv1',BasicConv2d(channels, int(64*alpha), ksize=7, stride=2, pad=3))]
output_size = int((self.image_size-7+2*3)/2+1)
# N x 64 x 112 x 112
self.layers += [('_maxpooling1',MaxPooling2D(ksize=3, stride=2, pad=0))]
output_size = math.ceil((output_size-3+2*0)/2 +1)
# N x 64 x 56 x 56
self.layers += [('conv2',BasicConv2d(int(64*alpha), int(64*alpha), ksize=1))]
# N x 64 x 56 x 56
self.layers += [('conv3',BasicConv2d(int(64*alpha), int(192*alpha), ksize=3, pad=1))]
# N x 192 x 56 x 56
self.layers += [('_maxpooling2',MaxPooling2D(ksize=3, stride=2, pad=0))]
output_size = math.ceil((output_size-3+2*0)/2 +1)
dict_inception={
#'layer_block': ['#1x1','#3x3 reduce','#3x3','#5x5 reduce','5x5','pool_proj']
'3a':[64, 96, 128, 16, 32, 32],
'3b':[128, 128, 192, 32, 96, 64],
'4a':[192, 96, 208, 16, 48, 64],
'4b':[160, 112, 224, 24, 64, 64],
'4c':[128, 128, 256, 24, 64, 64],
'4d':[112, 144, 288, 32, 64, 64],
'4e':[256, 160, 320, 32, 128, 128],
'5a':[256, 160, 320, 32, 128, 128],
'5b':[384, 192, 384, 48, 128, 128]
}
# N x 192 x 28 x 28
self.layers += [('inception3a',Inception(int(192*alpha), int(dict_inception['3a'][0]*alpha), int(dict_inception['3a'][1]*alpha), int(dict_inception['3a'][2]*alpha), int(dict_inception['3a'][3]*alpha), int(dict_inception['3a'][4]*alpha), int(dict_inception['3a'][5]*alpha)))]
# N x 256 x 28 x 28
self.layers += [('inception3b',Inception(int(dict_inception['3a'][0]*alpha) + int(dict_inception['3a'][2]*alpha) + int(dict_inception['3a'][4]*alpha) + int(dict_inception['3a'][5]*alpha), int(dict_inception['3b'][0]*alpha), int(dict_inception['3b'][1]*alpha), int(dict_inception['3b'][2]*alpha), int(dict_inception['3b'][3]*alpha), int(dict_inception['3b'][4]*alpha), int(dict_inception['3b'][5]*alpha)))]
# N x 480 x 28 x 28
self.layers += [('_maxpooling3',MaxPooling2D(ksize=3, stride=2, pad=0))]
output_size = math.ceil((output_size-3+2*0)/2 +1)
# N x 480 x 14 x 14
self.layers += [('inception4a',Inception(int(dict_inception['3b'][0]*alpha) + int(dict_inception['3b'][2]*alpha) + int(dict_inception['3b'][4]*alpha) + int(dict_inception['3b'][5]*alpha), int(dict_inception['4a'][0]*alpha), int(dict_inception['4a'][1]*alpha), int(dict_inception['4a'][2]*alpha), int(dict_inception['4a'][3]*alpha), int(dict_inception['4a'][4]*alpha), int(dict_inception['4a'][5]*alpha)))]
# 是否使用辅助分类器
# N x 512 x 14 x 14
self.layers += [('aux1',InceptionAux(int(dict_inception['4a'][0]*alpha) + int(dict_inception['4a'][2]*alpha) + int(dict_inception['4a'][4]*alpha) + int(dict_inception['4a'][5]*alpha), num_classes,output_size,alpha))]
# N x 512 x 14 x 14
self.layers += [('inception4b',Inception(int(dict_inception['4a'][0]*alpha) + int(dict_inception['4a'][2]*alpha) + int(dict_inception['4a'][4]*alpha) + int(dict_inception['4a'][5]*alpha), int(dict_inception['4b'][0]*alpha), int(dict_inception['4b'][1]*alpha), int(dict_inception['4b'][2]*alpha), int(dict_inception['4b'][3]*alpha), int(dict_inception['4b'][4]*alpha), int(dict_inception['4b'][5]*alpha)))]
# N x 512 x 14 x 14
self.layers += [('inception4c',Inception(int(dict_inception['4b'][0]*alpha) + int(dict_inception['4b'][2]*alpha) + int(dict_inception['4b'][4]*alpha) + int(dict_inception['4b'][5]*alpha), int(dict_inception['4c'][0]*alpha), int(dict_inception['4c'][1]*alpha), int(dict_inception['4c'][2]*alpha), int(dict_inception['4c'][3]*alpha), int(dict_inception['4c'][4]*alpha), int(dict_inception['4c'][5]*alpha)))]
# N x 512 x 14 x 14
self.layers += [('inception4d',Inception(int(dict_inception['4c'][0]*alpha) + int(dict_inception['4c'][2]*alpha) + int(dict_inception['4c'][4]*alpha) + int(dict_inception['4c'][5]*alpha), int(dict_inception['4d'][0]*alpha), int(dict_inception['4d'][1]*alpha), int(dict_inception['4d'][2]*alpha), int(dict_inception['4d'][3]*alpha), int(dict_inception['4d'][4]*alpha), int(dict_inception['4d'][5]*alpha)))]
# 是否使用辅助分类器
# N x 528 x 14 x 14
self.layers += [('aux2',InceptionAux(int(dict_inception['4d'][0]*alpha) + int(dict_inception['4d'][2]*alpha) + int(dict_inception['4d'][4]*alpha) + int(dict_inception['4d'][5]*alpha), num_classes,output_size,alpha))]
# N x 832 x 14 x 14
self.layers += [('inception4e',Inception(int(dict_inception['4d'][0]*alpha) + int(dict_inception['4d'][2]*alpha) + int(dict_inception['4d'][4]*alpha) + int(dict_inception['4d'][5]*alpha), int(dict_inception['4e'][0]*alpha), int(dict_inception['4e'][1]*alpha), int(dict_inception['4e'][2]*alpha), int(dict_inception['4e'][3]*alpha), int(dict_inception['4e'][4]*alpha), int(dict_inception['4e'][5]*alpha)))]
# N x 832 x 14 x 14
self.layers += [('_maxpooling4',MaxPooling2D(ksize=3, stride=2, pad=0))]
output_size = math.ceil((output_size-3+2*0)/2 +1)
# N x 832 x 7 x 7
self.layers += [('inception5a',Inception(int(dict_inception['4e'][0]*alpha) + int(dict_inception['4e'][2]*alpha) + int(dict_inception['4e'][4]*alpha) + int(dict_inception['4e'][5]*alpha), int(dict_inception['5a'][0]*alpha), int(dict_inception['5a'][1]*alpha), int(dict_inception['5a'][2]*alpha), int(dict_inception['5a'][3]*alpha), int(dict_inception['5a'][4]*alpha), int(dict_inception['5a'][5]*alpha)))]
# N x 832 x 7 x 7
self.layers += [('inception5b',Inception(int(dict_inception['5a'][0]*alpha) + int(dict_inception['5a'][2]*alpha) + int(dict_inception['5a'][4]*alpha) + int(dict_inception['5a'][5]*alpha), int(dict_inception['5b'][0]*alpha), int(dict_inception['5b'][1]*alpha), int(dict_inception['5b'][2]*alpha), int(dict_inception['5b'][3]*alpha), int(dict_inception['5b'][4]*alpha), int(dict_inception['5b'][5]*alpha)))]
# 自适应平均池话下采样操作
# N x 1024 x 7 x 7
self.layers += [('_avgpool',AveragePooling2D(ksize=output_size,stride=1,pad=0))]
# N x 1024 x 1 x 1
self.layers += [('_reshape',Reshape((batch_size,int(1024*alpha))))]
# N x 1024
self.layers +=[("_dropout",Dropout(0.4))]
# N x 1000 (num_classes)
self.layers += [('fc',L.Linear(int(1024*alpha), num_classes))]
with self.init_scope():
for n in self.layers:
if not n[0].startswith('_'):
setattr(self, n[0], n[1])
def forward(self, x):
aux=[]
for n, f in self.layers:
origin_size = x.shape
if not n.startswith('_'):
if chainer.config.train:
if 'aux' in n:
aux_x = getattr(self, n)(x)
aux.append(aux_x)
else:
x = getattr(self, n)(x)
else:
if 'aux' not in n:
x = getattr(self, n)(x)
else:
x = f.apply((x,))[0]
if chainer.config.train == False and 'aux' in n:
continue
print(n,origin_size,x.shape)
if chainer.config.train:
return x,aux[0],aux[1]
return F.softmax(x)
注意此类就是GoogleNet的实现过程,注意网络的前向传播过程中,分了训练以及测试。
训练过程中直接返回x,测试过程中会进入softmax得出概率
还有googlenet有辅助分类器
调用方式
if __name__ == '__main__':
batch_size = 4
n_channels = 3
image_size = 224
num_classes = 123
model = GoogLeNet(num_classes=num_classes, channels=n_channels,image_size=image_size,batch_size=batch_size)
print("参数量",model.count_params())
x = np.random.rand(batch_size, n_channels, image_size, image_size).astype(np.float32)
t = np.random.randint(0, num_classes, size=(batch_size,)).astype(np.int32)
with chainer.using_config('train', True):
y1,y2,y3 = model(x)
loss1 = F.softmax_cross_entropy(y1, t)
loss2 = F.softmax_cross_entropy(y2, t)
loss3 = F.softmax_cross_entropy(y3, t)
print(loss1.data,loss2.data,loss3.data)