从训练样例中取1000个进行训练,再对1000个测试样例进行检测,出现过拟合情况,而且损失函数值和测试精度值波动很大。
# coding=utf-8
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"]='2' # 只显示 warning 和 Error
"""
mnist_loader
~~~~~~~~~~~~
A library to load the MNIST image data. For details of the data
structures that are returned, see the doc strings for ``load_data``
from tensorflow.python.ops.distributions.kullback_leibler import cross_entropy
from lib2to3.tests.data.infinite_recursion import sess_cert_st
"""
#### Libraries
# Standard library
import pickle
import gzip
# Third-party libraries
import numpy as np
def load_data():
"""Return the MNIST data as a tuple containing the training data,
the validation data, and the test data.
The ``training_data`` is returned as a tuple with two entries.
The first entry contains the actual training images. This is a
numpy ndarray with 50,000 entries. Each entry is, in turn, a
numpy ndarray with 784 values, representing the 28 * 28 = 784
pixels in a single MNIST image.
The second entry in the ``training_data`` tuple is a numpy ndarray
containing 50,000 entries. Those entries are just the digit
values (0...9) for the corresponding images contained in the first
entry of the tuple.
The ``validation_data`` and ``test_data`` are similar, except
each contains only 10,000 images.
"""
f = gzip.open('../data/mnist.pkl.gz', 'rb')
training_data, validation_data, test_data = pickle.load(f,encoding='bytes')
f.close()
return (training_data, validation_data, test_data)
def vectorized_result(j):
"""Return a 10-dimensional unit vector with a 1.0 in the jth
position and zeroes elsewhere. This is used to convert a digit
(0...9) into a corresponding desired output from the neural
network."""
e = np.zeros(10)
e[j] = 1.0
return e
import tensorflow as tf
import matplotlib.pyplot as plt
from random import randint
logs_path=r'c:/temp/log_mnist_softmax_2layers'
logs_path2=r'c:/temp/log_mnist_softmax_2layers_2'
batch_size=10
learning_rate=0.005 #当>0.05时误差很大
training_epochs=30
training_data, validation_data, test_data = load_data()
trainData_in=training_data[0][:1000]
trainData_out=[vectorized_result(j) for j in training_data[1][:1000]]
validData_in=validation_data[0]
validData_out=[vectorized_result(j) for j in validation_data[1]]
testData_in=test_data[0]
testData_out=[vectorized_result(j) for j in test_data[1]]
x_input=tf.placeholder(tf.float32, [None,784], name='x_input')
y_desired=tf.placeholder(tf.float32,[None,10])
#########################################
w1=tf.Variable(tf.zeros([784,30]))
b1=tf.Variable(tf.zeros([30]))
y1=tf.nn.sigmoid(tf.matmul(x_input,w1)+b1)
w=tf.Variable(tf.zeros([30,10]))
b=tf.Variable(tf.zeros([10]))
##########################################
y_output=tf.nn.softmax(tf.matmul(y1,w)+b,name='y_output')
lossFun_crossEntropy=-tf.reduce_mean(y_desired*tf.log(y_output))*1000.0
correct_prediction=tf.equal(tf.argmax(y_output,1),\
tf.argmax(y_desired,1)) #1:按行索引,每行得一索引值
accuracy=tf.reduce_mean(tf.cast(correct_prediction,\
tf.float32))#将逻辑型变成数字型,再求均值
train_step=tf.train.GradientDescentOptimizer(learning_rate).minimize(lossFun_crossEntropy)
tf.summary.scalar('cost',lossFun_crossEntropy)
tf.summary.scalar('accuracy',accuracy)
summary_op=tf.summary.merge_all()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
logs_writer=tf.summary.FileWriter(logs_path,graph=tf.get_default_graph())
logs_writer2=tf.summary.FileWriter(logs_path2)
for epoch in range(training_epochs):
batch_count=int(len(trainData_in)/batch_size)
for i in range(batch_count):
batch_x=trainData_in[batch_size*i:batch_size*(i+1)]
batch_y=trainData_out[batch_size*i:batch_size*(i+1)]
_,summary=sess.run([train_step,summary_op],\
feed_dict={x_input:batch_x,\
y_desired:batch_y})
logs_writer.add_summary(summary,\
epoch*batch_count+i)
#以上将训练的Cost和Accuracy写到logs_path,以下将测试的Cost和Accuracy写到logs_path2
_,summary=sess.run([train_step,summary_op],\
feed_dict={x_input:testData_in,\
y_desired:testData_out})
logs_writer2.add_summary(summary,\
epoch*batch_count+i)
print('Epoch',epoch)
print('Accuracy_train:',accuracy.eval\
(feed_dict={x_input:trainData_in,
y_desired:trainData_out}))
print('Accuracy:',accuracy.eval\
(feed_dict={x_input:testData_in,
y_desired:testData_out}))
print('Done')
n=randint(0,len(testData_in))
try_input=testData_in[n]
try_desired=testData_out[n]
print(try_desired)
print(y_output.eval(feed_dict={x_input:[try_input]}))
try_input.resize(28,28)
plt.imshow(try_input,cmap='Greys_r')
plt.show()
saver=tf.train.Saver()
save_path=saver.save(sess,r'c:/temp/saved_mnist_cnn/saved_mnist_cnn.ckp')
print('Model saved to %s' % save_path)
运行tensorboard:
tensorboard --logdir=run1:"C:\temp\log_mnist_softmax_2layers",run2:"C:\temp\log_mnist_softmax_2layers_2"
??为什么训练过程中,波动这么大?TensorFlow怎么计算梯度?怎么更新参数?
和理论:http://neuralnetworksanddeeplearning.com/chap3.html
有何不同?
下一步,打算构造一个最简单的网络,简单到可以手算,来进行深入研究。