LeNet-5模型无法很好地处理类似ImageNet这样大的图像数据集
import tensorflow as tf
#配置神经网络参数
INPUT_NODE=784
OUTPUT_NODE=10
IMAGE_SIZE=28
NUM_CHANNELS=1
NUM_LABELS=10
#第一层卷积层的尺寸和深度
CONV1_DEEP=32
CONV1_SIZE=5
#第二层卷积层的尺寸和深度
CONV2_DEEP=64
CONV2_SIZE=5
#全连接层的节点个数
FC_SIZE=512
#卷积神经网络的前向传播过程
def inference(input_tensor,train,regularizer):
#第一层卷积层
with tf.variable_scope('layer1-conv1'):
conv1_weights=tf.get_variable("weight",[CONV1_SIZE,CONV1_SIZE,NUM_CHANNELS,CONV1_DEEP],
initializer=tf.truncated_normal_initializer(stddev=0.1))
conv1_biases=tf.get_variable("bias",[CONV1_DEEP],initializer=tf.constant_initializer(0.0))
#过滤器移动步长为1,全0填充
conv1=tf.nn.conv2d(input_tensor,conv1_weights,strides=[1,1,1,1],padding='SAME')
relu1=tf.nn.relu(tf.nn.bias_add(conv1,conv1_biases))
#第二层池化层,最大池化层
with tf.name_scope('layer2-pool1'):
#过滤器边长为2,移动步长为2,全0填充
pool1=tf.nn.max_pool(relu1,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')
#第三层卷积层
with tf.variable_scope('layer3-conv2'):
conv2_weights=tf.get_variable("weight",[CONV2_SIZE,CONV2_SIZE,CONV1_DEEP,CONV2_DEEP],
initializer=tf.truncated_normal_initializer(stddev=0.1))
conv2_biases=tf.get_variable("bias",[CONV2_DEEP],initializer=tf.constant_initializer(0.0))
#过滤器移动步长为1,全0填充
conv2=tf.nn.conv2d(pool1,conv2_weights,strides=[1,1,1,1],padding='SAME')
relu2=tf.nn.relu(tf.nn.bias_add(conv2,conv2_biases))
#第四层池化层,最大池化层
with tf.name_scope('layer4-pool2'):
#过滤器边长为2,移动步长为2,全0填充
pool2=tf.nn.max_pool(relu2,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')
#将第四层池化层的输出转化为第五层全连接层的输入格式
pool_shape=pool2.get_shape().as_list()
nodes=pool_shape[1]*pool_shape[2]*pool_shape[3]#矩阵拉直后的长度
reshaped=tf.reshape(pool2,[pool_shape[0],nodes])
#第五层全连接层
with tf.variable_scope('layer5-fc1'):
fc1_weights=tf.get_variable("weight",[nodes,FC_SIZE],initializer=tf.truncated_normal_initializer(stddev=0.1))
#只有全连接层的权重需要加入正则化
if regularizer!=None:
tf.add_to_collection('losses',regularizer(fc1_weights))
fc1_biases=tf.get_variable("bias",[FC_SIZE],initializer=tf.constant_initializer(0.1))
fc1=tf.nn.relu(tf.matmul(reshaped,fc1_weights)+fc1_biases)
if train:
fc1=tf.nn.dropout(fc1,0.5)#dropout在训练时随机将部分节点的输出改为0,防止过拟合
#第六层全连接层
with tf.variable_scope('layer6-fc2'):
fc2_weights=tf.get_variable("weight",[FC_SIZE,NUM_LABELS],initializer=tf.truncated_normal_initializer(stddev=0.1))
#只有全连接层的权重需要加入正则化
if regularizer!=None:
tf.add_to_collection('losses',regularizer(fc2_weights))
fc2_biases=tf.get_variable("bias",[NUM_LABELS],initializer=tf.constant_initializer(0.1))
logit=tf.matmul(fc1,fc2_weights)+fc2_biases
return logit
from tensorflow.examples.tutorials.mnist import input_data
import os
import numpy as np
BATCH_SIZE=100
LEARNING_RATE_BASE=0.01 #基础学习率
LEARNING_RATE_DECAY=0.99 #学习率衰减率
REGULARAZTION_RATE=0.0001 #正则化项在损失函数中的系数
TRAINING_STEPS=6000 #训练轮数
MOVING_AVERAGE_DECAY=0.99 #滑动平均衰减率
def train(mnist):
x=tf.placeholder(tf.float32,[BATCH_SIZE,IMAGE_SIZE,IMAGE_SIZE,NUM_CHANNELS],name='x-input')
y_=tf.placeholder(tf.float32,[None,OUTPUT_NODE],name='y-input')
regularizer=tf.contrib.layers.l2_regularizer(REGULARAZTION_RATE)#l2正则化
y=inference(x,False,regularizer)#前向传播结果
global_step=tf.Variable(0,trainable=False)
#定义损失函数、学习率、滑动平均操作以及训练过程
cross_entropy=tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y,labels=tf.argmax(y_,1))
cross_entropy_mean=tf.reduce_mean(cross_entropy)
loss=cross_entropy_mean+tf.add_n(tf.get_collection('losses'))
learning_rate=tf.train.exponential_decay(LEARNING_RATE_BASE,global_step,mnist.train.num_examples/BATCH_SIZE,LEARNING_RATE_DECAY,staircase=True)
variable_averages=tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY,global_step)
variables_averages_op=variable_averages.apply(tf.trainable_variables())
train_step=tf.train.GradientDescentOptimizer(learning_rate).minimize(loss,global_step=global_step)
with tf.control_dependencies([train_step,variables_averages_op]):
train_op=tf.no_op(name='train')
#初始化TensorFlow持久化类
saver=tf.train.Saver()
with tf.Session() as sess:
tf.global_variables_initializer().run()
for i in range(TRAINING_STEPS):
xs,ys=mnist.train.next_batch(BATCH_SIZE)
reshaped_xs=np.reshape(xs,(BATCH_SIZE,IMAGE_SIZE,IMAGE_SIZE,NUM_CHANNELS))
_,loss_value,step=sess.run([train_op,loss,global_step],feed_dict={x:reshaped_xs,y_:ys})
if i%1000==0:
print("After %d training step(s),loss on training batch is %g."%(step,loss_value))
#主程序
def main(argv=None):
mnist=input_data.read_data_sets("/MNIST_data/",one_hot=True)
train(mnist)
#调用上面定义的main函数
if __name__=='__main__':
main()
After 1 training step(s),loss on training batch is 5.48538.
After 1001 training step(s),loss on training batch is 0.769271.
After 2001 training step(s),loss on training batch is 0.675722.
After 3001 training step(s),loss on training batch is 0.65313.
After 4001 training step(s),loss on training batch is 0.639686.
After 5001 training step(s),loss on training batch is 0.65985.