XCEPTION 论文中给出的网络结构图
上图给出了网络规格的完整描述。Xception体系结构具有36个卷积层,构成了网络的特征提取基础。共计 36 层分为 Entry flow;Middle flow;Exit flow。Entry flow 包含 8 个 conv;Middle flow 包含 3*8 =24 个 conv;Exit flow 包含 4 个 conv,所以 Xception 共计 36 层。
在我们的实验评估中,我们将专门研究图像分类,因此我们的卷积基础将紧跟着逻辑回归层。可选地,可以在逻辑回归层之前插入完全连接的层。36个卷积层被构造为14个模块,除了第一个和最后一个模块外,所有这些模块周围都具有线性残差连接。
import numpy as np
from keras.optimizers import SGD
from keras.utils import np_utils
from keras.utils.vis_utils import plot_model
import matplotlib.pyplot as plt #重要
from keras.layers import Activation, Convolution2D, Dropout, Conv2D
from keras.layers import AveragePooling2D, BatchNormalization
from keras.layers import GlobalAveragePooling2D
from keras.models import Sequential
from keras.layers import Flatten
from keras.models import Model
from keras.layers import Input
from keras.layers import MaxPooling2D
from keras.layers import SeparableConv2D
from keras import layers
from keras.regularizers import l2
#import pydotplus as plt
import os
os.environ["PATH"] += os.pathsep + 'D:/graphviz/bin/'
def run(input_shape, num_classes, l2_regularization=0.01):
regularization = l2(l2_regularization)
img_input = Input(input_shape)
# 一 Entry flow
x = Conv2D(32, (3, 3), strides=(2, 2), use_bias=False, name='block1_conv1')(img_input)
x = BatchNormalization(name='block1_conv1_bn')(x)
x = Activation('relu', name='block1_conv1_act')(x)
x = Conv2D(64, (3, 3), use_bias=False, name='block1_conv2')(x)
x = BatchNormalization(name='block1_conv2_bn')(x)
x = Activation('relu', name='block1_conv2_act')(x)
residual = Conv2D(128, (1, 1), strides=(2, 2),padding='same', use_bias=False)(x)
residual = BatchNormalization()(residual)
x = SeparableConv2D(128, (3, 3), padding='same', use_bias=False, name='block2_sepconv1')(x)
x = BatchNormalization(name='block2_sepconv1_bn')(x)
x = Activation('relu', name='block2_sepconv2_act')(x)
x = SeparableConv2D(128, (3, 3), padding='same', use_bias=False, name='block2_sepconv2')(x)
x = BatchNormalization(name='block2_sepconv2_bn')(x)
x = MaxPooling2D((3, 3), strides=(2, 2), padding='same', name='block2_pool')(x)
# 第一个add:residual = Conv2D(128, (1, 1), strides=(2, 2),padding='same', use_bias=False)(x)
x = layers.add([x, residual])
residual = Conv2D(256, (1, 1), strides=(2, 2),padding='same', use_bias=False)(x)
residual = BatchNormalization()(residual)
x = Activation('relu', name='block3_sepconv1_act')(x)
x = SeparableConv2D(256, (3, 3), padding='same', use_bias=False, name='block3_sepconv1')(x)
x = BatchNormalization(name='block3_sepconv1_bn')(x)
x = Activation('relu', name='block3_sepconv2_act')(x)
x = SeparableConv2D(256, (3, 3), padding='same', use_bias=False, name='block3_sepconv2')(x)
x = BatchNormalization(name='block3_sepconv2_bn')(x)
x = MaxPooling2D((3, 3), strides=(2, 2), padding='same', name='block3_pool')(x)
# 第二个add:residual = Conv2D(256, (1, 1), strides=(2, 2),padding='same', use_bias=False)(x)
x = layers.add([x, residual])
residual = Conv2D(728, (1, 1), strides=(2, 2),padding='same', use_bias=False)(x)
residual = BatchNormalization()(residual)
x = Activation('relu', name='block4_sepconv1_act')(x)
x = SeparableConv2D(728, (3, 3), padding='same', use_bias=False, name='block4_sepconv1')(x)
x = BatchNormalization(name='block4_sepconv1_bn')(x)
x = Activation('relu', name='block4_sepconv2_act')(x)
x = SeparableConv2D(728, (3, 3), padding='same', use_bias=False, name='block4_sepconv2')(x)
x = BatchNormalization(name='block4_sepconv2_bn')(x)
x = MaxPooling2D((3, 3), strides=(2, 2), padding='same', name='block4_pool')(x)
# 第三个add:residual = Conv2D(728, (1, 1), strides=(2, 2),padding='same', use_bias=False)(x)
x = layers.add([x, residual])
# 二 Middle flow
for i in range(8):
residual = x
prefix = 'block' + str(i + 5)
x = Activation('relu', name=prefix + '_sepconv1_act')(x)
x = SeparableConv2D(728, (3, 3), padding='same', use_bias=False, name=prefix + '_sepconv1')(x)
x = BatchNormalization(name=prefix + '_sepconv1_bn')(x)
x = Activation('relu', name=prefix + '_sepconv2_act')(x)
x = SeparableConv2D(728, (3, 3), padding='same', use_bias=False, name=prefix + '_sepconv2')(x)
x = BatchNormalization(name=prefix + '_sepconv2_bn')(x)
x = Activation('relu', name=prefix + '_sepconv3_act')(x)
x = SeparableConv2D(728, (3, 3), padding='same', use_bias=False, name=prefix + '_sepconv3')(x)
x = BatchNormalization(name=prefix + '_sepconv3_bn')(x)
x = layers.add([x, residual])
residual = Conv2D(1024, (1, 1), strides=(2, 2),padding='same', use_bias=False)(x)
residual = BatchNormalization()(residual)
# feature maps
x = Activation('relu', name='block13_sepconv1_act')(x)
x = SeparableConv2D(728, (3, 3), padding='same', use_bias=False, name='block13_sepconv1')(x)
x = BatchNormalization(name='block13_sepconv1_bn')(x)
x = Activation('relu', name='block13_sepconv2_act')(x)
x = SeparableConv2D(1024, (3, 3), padding='same', use_bias=False, name='block13_sepconv2')(x)
x = BatchNormalization(name='block13_sepconv2_bn')(x)
x = MaxPooling2D((3, 3), strides=(2, 2), padding='same', name='block13_pool')(x)
# residual = Conv2D(1024, (1, 1), strides=(2, 2),padding='same', use_bias=False)(x)
x = layers.add([x, residual])
x = SeparableConv2D(1536, (3, 3), padding='same', use_bias=False, name='block14_sepconv1')(x)
x = BatchNormalization(name='block14_sepconv1_bn')(x)
x = Activation('relu', name='block14_sepconv1_act')(x)
x = SeparableConv2D(2048, (3, 3), padding='same', use_bias=False, name='block14_sepconv2')(x)
x = BatchNormalization(name='block14_sepconv2_bn')(x)
x = Activation('relu', name='block14_sepconv2_act')(x)
x = GlobalAveragePooling2D(name='avg_pool')(x)
# 输出
#output = Activation('softmax',name='predictions')(x)
output = Dense(num_classes, activation='softmax', name='predictions')(x)
#x = Dense(num_classes, activation='softmax', name='predictions')(x)
model = Model(img_input, output)
# 神经网络可视化
#plot_model(model, to_file='model.png')
plot_model(model,to_file="Xmodel.png",show_shapes=True,show_layer_names=False,rankdir='TB')
plt.figure(figsize=(10,10))
img = plt.imread("Xmodel.png")
plt.imshow(img)
plt.axis('off')
plt.show()
if __name__ == '__main__':
input_shape = (48, 48, 1)
num_classes = 7
run(input_shape, num_classes)
注:尝试过程如遇到“InvocationException: GraphViz's executables not found ”问题,请参考我的另一篇博客https://blog.csdn.net/dujuancao11/article/details/105455713
假设输入图片是48x48的灰度图片,代码运行结束生成Xmodel.png文件
读取文件内容如下:
注意:最后一层使用全连接层得到想要的输出类别。output = Dense(num_classes, activation='softmax', name='predictions')(x)