数据库:MNIST,与这里 对比 tf.nn.depthwise_conv2d的理解看这里 ,主要是对卷积核参数的理解,即(高度,宽度,输入通道,每个通道得到的输出通道数) 训练速度慢,收敛也慢,刚开始就像没训练的样子,只将一个卷积层改成深度可分离卷积就增加了12次迭代 20211213:将第一个卷积层改成深度可分离卷积后,训练始终不收敛。所以改成下列代码,多训练了16次,模型减小了6.85%。使用sp_conv也会不收敛,可能是batch_normalization的原因 20211214:破案了,是relu的锅,注释以后,多训练了28次,看网上说好像要减小学习率
import tensorflow as tf
import numpy as np
import random
import cv2, sys, os
import MyData
def sp_conv ( name, data, kernel_size, input_num, output_num, padding, data_format= 'NHWC' ) :
with tf. variable_scope( name) :
weight = tf. get_variable( name= 'weight' , dtype= tf. float32, trainable= True , shape= [ kernel_size, kernel_size, input_num, 1 ] , initializer= tf. random_normal_initializer( stddev= 0.01 ) )
conv = tf. nn. depthwise_conv2d( data, weight, [ 1 , 1 , 1 , 1 ] , padding, data_format= data_format)
conv = tf. layers. batch_normalization( conv, momentum= 0.9 )
point_weight = tf. get_variable( name= 'point_weight' , dtype= tf. float32, trainable= True , shape= [ 1 , 1 , input_num, output_num] , initializer= tf. random_normal_initializer( stddev= 0.01 ) )
conv = tf. nn. conv2d( conv, point_weight, [ 1 , 1 , 1 , 1 ] , padding, data_format= data_format)
conv = tf. layers. batch_normalization( conv, momentum= 0.9 )
return conv
data= tf. placeholder( tf. float32, [ None , 28 , 28 , 3 ] , name= 'data' )
label= tf. placeholder( tf. float32, [ None , 10 ] , name= 'label' )
with tf. variable_scope( 'conv1' ) :
weight = tf. get_variable( name= 'weight' , dtype= tf. float32, trainable= True , shape= [ 5 , 5 , 1 , 6 ] , initializer= tf. random_normal_initializer( stddev= 0.01 ) )
conv1 = tf. nn. conv2d( data, weight, [ 1 , 1 , 1 , 1 ] , 'SAME' )
bias = tf. get_variable( name= 'bias' , shape= 6 , trainable= True , dtype= tf. float32, initializer= tf. constant_initializer( 0.0 ) )
conv1 = tf. nn. bias_add( conv1, bias)
with tf. variable_scope( 'pool1' ) :
pool1 = tf. nn. max_pool( conv1, ksize= [ 1 , 2 , 2 , 1 ] , strides= [ 1 , 2 , 2 , 1 ] , padding= 'VALID' )
with tf. variable_scope( 'conv2' ) :
weight2 = tf. get_variable( name= 'weight' , dtype= tf. float32, trainable= True , shape= [ 5 , 5 , 6 , 1 ] , initializer= tf. random_normal_initializer( stddev= 0.01 ) )
conv2 = tf. nn. depthwise_conv2d( pool1, weight2, [ 1 , 1 , 1 , 1 ] , 'VALID' )
point_weight2 = tf. get_variable( name= 'point_weight' , dtype= tf. float32, trainable= True , shape= [ 1 , 1 , 6 , 16 ] , initializer= tf. random_normal_initializer( stddev= 0.01 ) )
conv2 = tf. nn. conv2d( conv2, point_weight2, [ 1 , 1 , 1 , 1 ] , 'VALID' )
bias2 = tf. get_variable( name= 'bias' , shape= 16 , trainable= True , dtype= tf. float32, initializer= tf. constant_initializer( 0.0 ) )
conv2 = tf. nn. bias_add( conv2, bias2)
with tf. variable_scope( 'pool2' ) :
pool2 = tf. nn. max_pool( conv2, ksize= [ 1 , 2 , 2 , 1 ] , strides= [ 1 , 2 , 2 , 1 ] , padding= 'VALID' )
with tf. variable_scope( 'dense1' ) :
flat = tf. reshape( pool2, [ - 1 , 5 * 5 * 16 ] )
dense1 = tf. layers. dense( inputs= flat, units= 80 , activation= tf. nn. relu, use_bias= True )
with tf. variable_scope( 'dense2' ) :
dense2 = tf. layers. dense( inputs= dense1, units= 10 , activation= None , use_bias= True )
y = tf. nn. softmax( dense2)
cross_entropy = tf. nn. softmax_cross_entropy_with_logits( labels= label, logits= dense2)
loss= tf. reduce_sum( cross_entropy)
update_ops = tf. get_collection( tf. GraphKeys. UPDATE_OPS)
with tf. control_dependencies( update_ops) :
train_step = tf. train. GradientDescentOptimizer( 1e - 4 ) . minimize( loss)
correct_prediction = tf. equal( tf. argmax( y, 1 ) , tf. argmax( label, 1 ) )
accuracy = tf. reduce_mean( tf. cast( correct_prediction, tf. float32) )
config = tf. ConfigProto( )
config. gpu_options. allow_growth = True
config. gpu_options. per_process_gpu_memory_fraction = 0.1
sess = tf. Session( config= config)
sess. run( tf. global_variables_initializer( ) )
input_data = MyData. Dataset( '/home/lwd/data/mnist/train.txt' , True , 32 )
test_data = MyData. Dataset( '/home/lwd/data/mnist/test.txt' , False , 32 )
saver = tf. train. Saver( )
summary_writer = tf. summary. FileWriter( './log/' , sess. graph)
for i in range ( 100000 ) :
total = 0
cnt = 0
tl = 0
for item in input_data:
_, acc, lo = sess. run( [ train_step, accuracy, loss] , feed_dict= { data: item[ 0 ] , label: item[ 1 ] } )
total += acc
cnt += 1.0
tl += lo
print ( i, total/ cnt, tl / cnt)
if total/ cnt > 0.88 :
saver. save( sess, './checkpoint/mb' )
xh = 0
acc = 0
for item in test_data:
yy = sess. run( y, feed_dict= { data: item[ 0 ] } )
for k in range ( yy. shape[ 0 ] ) :
if ( np. argmax( yy[ k] ) == np. argmax( item[ 1 ] [ k] ) ) : acc += 1
xh += 1
print ( acc * 1.0 / xh)
sys. exit( 0 )