使用Tensorflow实现LeNet对mnist进行分类
1.代码
以下是网络定义:
import tensorflow as tf
import numpy as np
inp_raw = tf.placeholder("float", [None, 784])
raw_reshaped = tf.reshape(inp_raw, [-1, 28, 28, 1])
#inp = tf.placeholder("float", [None, 28, 28, 1])
outp = tf.placeholder("float", [None, 10])
def weight_variable(shape):
#initial = tf.truncated_normal(shape, stddev=0.1) # 从截断的正态分布中输出随机值
initial = tf.truncated_normal(shape, stddev=0.1) # 从截断的正态分布中输出随机值
#initial = tf.random_normal(shape, stddev=0.1) # 从截断的正态分布中输出随机值
#initial = tf.truncated_normal(shape) # 从截断的正态分布中输出随机值
#initial = tf.random_normal(shape, stddev=1) # 从截断的正态分布中输出随机值
#initial = tf.random_uniform(shape) # 从截断的正态分布中输出随机值
#initial = tf.constant(0.5, shape=shape)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def model():
# convLayer 1
filter1 = weight_variable([5, 5, 1, 6])
bias1 = bias_variable([6])
conv1 = tf.nn.conv2d(raw_reshaped, filter1, strides=[1, 1, 1, 1], padding="SAME")
output1 = tf.nn.relu(conv1 + bias1)
# maxPoolLayer 1
maxpool1 = tf.nn.max_pool(output1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")
# convLayer 2
filter2 = weight_variable([5, 5, 6, 16])
bias2 = bias_variable([16])
conv2 = tf.nn.conv2d(maxpool1, filter2, strides=[1, 1, 1, 1], padding="SAME")
output2 = tf.nn.relu(conv2 + bias2)
# maxPoolLayer 2
maxpool2 = tf.nn.max_pool(output2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")
# convLayer 3
filter3 = weight_variable([5, 5, 16, 120])
bias3 = bias_variable([120])
conv3 = tf.nn.conv2d(maxpool2, filter3, strides=[1, 1, 1, 1], padding="SAME")
output3 = tf.nn.relu(conv3 + bias3)
# fullConnectionLayers
# fullConnectionLayer 1
output3_flat = tf.reshape(output3, [-1, 7 * 7 * 120])
fc1_w = weight_variable([7 * 7 * 120, 80])
fc1_b = bias_variable([80])
output_fc1 = tf.nn.relu(tf.matmul(output3_flat, fc1_w) + fc1_b)
# outputLayer
op_w = weight_variable([80, 10])
op_b = bias_variable([10])
op = tf.nn.softmax(tf.matmul(output_fc1, op_w) + op_b)
return op
以下是训练部分:
import tensorflow as tf
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
from LeNet_lessMem import model
import os
os.environ["CUDA_VISIBLE_DEVICES"] = '0' #指定第一块GPU可用
config = tf.ConfigProto()
#config.gpu_options.per_process_gpu_memory_fraction = 0.5 # 程序最多只能占用指定gpu50%的显存
config.gpu_options.allow_growth = True #程序按需申请内存
#sess = tf.Session(config=config)
epochs = 10000
modelo = model.model()
#correctPrediction = tf.equal(tf.argmax(modelo, 1), tf.argmax(model.outp, 1))
#accuracy = tf.reduce_mean(tf.cast(correctPrediction, "float"))
# lost function
"""loss_p1 = 10
loss_p2 = 1
lostFunction = tf.reduce_sum(
tf.where(
tf.greater(modelo, model.outp),
(modelo - model.outp)*loss_p1,
(model.outp - modelo)*loss_p2
)
) #均方误差"""
lostFunction = -tf.reduce_sum(model.outp * tf.log(modelo))#交叉熵
#trainer = tf.train.RMSPropOptimizer(0.0001, 0.9).minimize(lostFunction)
#trainer = tf.train.GradientDescentOptimizer(0.0001).minimize(lostFunction)
#trainer = tf.train.MomentumOptimizer(0.0001, 0.01).minimize(lostFunction)
trainer = tf.train.AdamOptimizer(0.0001).minimize(lostFunction)
initV = tf.global_variables_initializer()
saver=tf.train.Saver()
correctPrediction = tf.equal(tf.argmax(modelo, 1), tf.argmax(model.outp, 1))
accuracy = tf.reduce_mean(tf.cast(correctPrediction, "float"))
mnist = input_data.read_data_sets("MNIST_data", one_hot=True)
if __name__ == "__main__":
with tf.Session(config=config) as sess:
sess.as_default()
sess.run(initV)
for i in range(epochs):
trdp, trlp = mnist.train.next_batch(200)
sess.run(trainer, feed_dict={model.inp_raw: trdp, model.outp: trlp})
if i % 100 == 0:
trainAcc = accuracy.eval(feed_dict={model.inp_raw: trdp, model.outp: trlp}, session=sess)
print(i + 1, "epoch.", "Accuracy on training examples:", trainAcc)
ted = mnist.test.images[0: 3000]
tel = mnist.test.labels[0: 3000]
testAcc = accuracy.eval(feed_dict={model.inp_raw: ted, model.outp: tel}, session=sess)
print("Accuracy on test examples:", testAcc)
saver.save(sess, "path/weights.ckpt")
sess.close()
2.遇到的问题
(1)显存占用
tensorflow会默认占用全部显存,所以需要申明一句config.gpu_options.allow_growth = True 然后sess = tf.Session(config=config),这样,tensorflow就会按需分配显存。
(2)损失函数
损失函数采用交叉熵比均方差收敛更快且效果更好。(为什么?目前我还不清楚。)
(3)权值初始化和激活函数选择问题
当全部采用sigmoid激活函数时,初始化使用random_normal()函数,训练没有什么问题。但是当激活函数换成relu时,必须限定使用truncated_normal(stddev=0.1)才可训练,否则在测试集的准确率一直不变,权值不会更新。(为什么?目前我也还不清楚。我已经采用了较小的学习率,而且图像的所有值都是非负的,初始所有的权值为非负也还是不行。纳闷)