深度可分离卷积本质上是一种分支网络结构 ,分支网络结构有以下好处: a.可提供不同的视野域 b.提升效率深度可分离卷积使用通道分支 ,能够减少参数提高计算效率,但同时也会造成梯度损失。深度可分离卷积由于其训练参数小的特点可以在手机上实现。 tf.keras 实现较简单,仅需把卷积神经网络 中的除输入层外的Conv2D替换为SeparableConv2D。(tensorflow2.0学习笔记:卷积神经网络(CNN) )
import matplotlib as mpl
import matplotlib. pyplot as plt
% matplotlib inline
import numpy as np
import sklearn
import pandas as pd
import os
import sys
import time
import tensorflow as tf
from tensorflow import keras
print ( tf. __version__)
fashion_mnist = keras. datasets. fashion_mnist
( x_train_all, y_train_all) , ( x_test, y_test) = fashion_mnist. load_data( )
x_valid, x_train = x_train_all[ : 5000 ] , x_train_all[ 5000 : ]
y_valid, y_train = y_train_all[ : 5000 ] , y_train_all[ 5000 : ]
print ( x_valid. shape, y_valid. shape)
print ( x_train. shape, y_train. shape)
print ( x_test. shape, y_test. shape)
from sklearn. preprocessing import StandardScaler
scaler = StandardScaler( )
x_train_scaled = scaler. fit_transform(
x_train. astype( np. float32) . reshape( - 1 , 1 ) ) . reshape( - 1 , 28 , 28 , 1 )
x_valid_scaled = scaler. transform(
x_valid. astype( np. float32) . reshape( - 1 , 1 ) ) . reshape( - 1 , 28 , 28 , 1 )
x_test_scaled = scaler. transform(
x_test. astype( np. float32) . reshape( - 1 , 1 ) ) . reshape( - 1 , 28 , 28 , 1 )
print ( np. max ( x_train_scaled) , np. min ( x_train_scaled) )
model = keras. models. Sequential( )
model. add( keras. layers. Conv2D( filters= 32 , kernel_size= 3 , padding= 'same' , activation= 'selu' , input_shape= ( 28 , 28 , 1 ) ) )
model. add( keras. layers. SeparableConv2D( filters= 32 , kernel_size= 3 , padding= 'same' , activation= 'selu' ) )
model. add( keras. layers. MaxPool2D( pool_size= 2 ) )
model. add( keras. layers. SeparableConv2D( filters= 64 , kernel_size= 3 , padding= 'same' , activation= 'selu' ) )
model. add( keras. layers. SeparableConv2D( filters= 64 , kernel_size= 3 , padding= 'same' , activation= 'selu' ) )
model. add( keras. layers. MaxPool2D( pool_size= 2 ) )
model. add( keras. layers. SeparableConv2D( filters= 128 , kernel_size= 3 , padding= 'same' , activation= 'selu' ) )
model. add( keras. layers. SeparableConv2D( filters= 128 , kernel_size= 3 , padding= 'same' , activation= 'selu' ) )
model. add( keras. layers. MaxPool2D( pool_size= 2 ) )
model. add( keras. layers. Flatten( ) )
model. add( keras. layers. Dense( 128 , activation= 'selu' ) )
model. add( keras. layers. Dense( 10 , activation= 'softmax' ) )
model. compile ( loss= "sparse_categorical_crossentropy" ,
optimizer = "adam" ,
metrics = [ "accuracy" ] )
model. summary( )
logdir = os. path. join( './separable_cnn_callbacks' )
if not os. path. exists( logdir) :
os. mkdir( logdir)
output_model_file = os. path. join( logdir, "fashion_mnist_model.h5" )
callbacks = [
keras. callbacks. ModelCheckpoint( output_model_file, save_best_only = True ) ,
keras. callbacks. EarlyStopping( monitor= "val_loss" , patience= 5 , min_delta= 1e - 3 )
]
history = model. fit( x_train_scaled, y_train, epochs= 1 ,
validation_data= ( x_valid_scaled, y_valid) )
def plot_learning_curve ( history) :
pd. DataFrame( history. history) . plot( figsize= ( 8 , 5 ) )
plt. grid( True )
plt. gca( ) . set_ylim( 0 , 3 )
plt. show( )
plot_learning_curve( history)
y = model. evaluate( x_test_scaled, y_test)