TensorFlow 第四步 多层神经网络 Mnist手写数字识别

从训练样例中取1000个进行训练,再对1000个测试样例进行检测,出现过拟合情况,而且损失函数值和测试精度值波动很大。

# coding=utf-8
import os  
os.environ["TF_CPP_MIN_LOG_LEVEL"]='2' # 只显示 warning 和 Error 

"""
mnist_loader
~~~~~~~~~~~~
A library to load the MNIST image data.  For details of the data
structures that are returned, see the doc strings for ``load_data``
from tensorflow.python.ops.distributions.kullback_leibler import cross_entropy
from lib2to3.tests.data.infinite_recursion import sess_cert_st
"""
#### Libraries
# Standard library
import pickle
import gzip
# Third-party libraries
import numpy as np

def load_data():
    """Return the MNIST data as a tuple containing the training data,
    the validation data, and the test data.

    The ``training_data`` is returned as a tuple with two entries.
    The first entry contains the actual training images.  This is a
    numpy ndarray with 50,000 entries.  Each entry is, in turn, a
    numpy ndarray with 784 values, representing the 28 * 28 = 784
    pixels in a single MNIST image.

    The second entry in the ``training_data`` tuple is a numpy ndarray
    containing 50,000 entries.  Those entries are just the digit
    values (0...9) for the corresponding images contained in the first
    entry of the tuple.

    The ``validation_data`` and ``test_data`` are similar, except
    each contains only 10,000 images.
    """
    f = gzip.open('../data/mnist.pkl.gz', 'rb')
    training_data, validation_data, test_data = pickle.load(f,encoding='bytes')
    f.close()
    return (training_data, validation_data, test_data)

def vectorized_result(j):
    """Return a 10-dimensional unit vector with a 1.0 in the jth
    position and zeroes elsewhere.  This is used to convert a digit
    (0...9) into a corresponding desired output from the neural
    network."""
    e = np.zeros(10)
    e[j] = 1.0
    return e

import tensorflow as tf
import matplotlib.pyplot as plt
from random import randint

logs_path=r'c:/temp/log_mnist_softmax_2layers'
logs_path2=r'c:/temp/log_mnist_softmax_2layers_2'
batch_size=10
learning_rate=0.005 #当>0.05时误差很大
training_epochs=30

training_data, validation_data, test_data = load_data()
trainData_in=training_data[0][:1000]
trainData_out=[vectorized_result(j) for j in training_data[1][:1000]]
validData_in=validation_data[0]
validData_out=[vectorized_result(j) for j in validation_data[1]]
testData_in=test_data[0]
testData_out=[vectorized_result(j) for j in test_data[1]]

x_input=tf.placeholder(tf.float32, [None,784], name='x_input')
y_desired=tf.placeholder(tf.float32,[None,10])

#########################################

w1=tf.Variable(tf.zeros([784,30]))
b1=tf.Variable(tf.zeros([30]))
y1=tf.nn.sigmoid(tf.matmul(x_input,w1)+b1)

w=tf.Variable(tf.zeros([30,10]))
b=tf.Variable(tf.zeros([10]))

##########################################

y_output=tf.nn.softmax(tf.matmul(y1,w)+b,name='y_output')
lossFun_crossEntropy=-tf.reduce_mean(y_desired*tf.log(y_output))*1000.0

correct_prediction=tf.equal(tf.argmax(y_output,1),\
                             tf.argmax(y_desired,1)) #1:按行索引,每行得一索引值
accuracy=tf.reduce_mean(tf.cast(correct_prediction,\
                                tf.float32))#将逻辑型变成数字型,再求均值

train_step=tf.train.GradientDescentOptimizer(learning_rate).minimize(lossFun_crossEntropy)

tf.summary.scalar('cost',lossFun_crossEntropy)
tf.summary.scalar('accuracy',accuracy)
summary_op=tf.summary.merge_all()

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    logs_writer=tf.summary.FileWriter(logs_path,graph=tf.get_default_graph())
    logs_writer2=tf.summary.FileWriter(logs_path2)
    for epoch in range(training_epochs):
        batch_count=int(len(trainData_in)/batch_size)
        for i in range(batch_count):
            batch_x=trainData_in[batch_size*i:batch_size*(i+1)]
            batch_y=trainData_out[batch_size*i:batch_size*(i+1)]
            _,summary=sess.run([train_step,summary_op],\
                               feed_dict={x_input:batch_x,\
                                          y_desired:batch_y})
            logs_writer.add_summary(summary,\
                                     epoch*batch_count+i)
#以上将训练的Cost和Accuracy写到logs_path,以下将测试的Cost和Accuracy写到logs_path2           
            _,summary=sess.run([train_step,summary_op],\
                               feed_dict={x_input:testData_in,\
                                          y_desired:testData_out})
            logs_writer2.add_summary(summary,\
                                     epoch*batch_count+i)            
            print('Epoch',epoch)
            print('Accuracy_train:',accuracy.eval\
                  (feed_dict={x_input:trainData_in,
                              y_desired:trainData_out}))
            print('Accuracy:',accuracy.eval\
                  (feed_dict={x_input:testData_in,
                              y_desired:testData_out}))
            print('Done')
            
    n=randint(0,len(testData_in))
    try_input=testData_in[n] 
    try_desired=testData_out[n]  
    print(try_desired)
    print(y_output.eval(feed_dict={x_input:[try_input]}))
    try_input.resize(28,28)
    plt.imshow(try_input,cmap='Greys_r')
    plt.show()
    
    saver=tf.train.Saver()
    save_path=saver.save(sess,r'c:/temp/saved_mnist_cnn/saved_mnist_cnn.ckp')
    print('Model saved to %s' % save_path)

运行tensorboard:

tensorboard --logdir=run1:"C:\temp\log_mnist_softmax_2layers",run2:"C:\temp\log_mnist_softmax_2layers_2"

??为什么训练过程中,波动这么大?TensorFlow怎么计算梯度?怎么更新参数?

和理论:http://neuralnetworksanddeeplearning.com/chap3.html

有何不同?

下一步,打算构造一个最简单的网络,简单到可以手算,来进行深入研究。

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值