tensorflow mnist手写数字（三）从零开始

最新推荐文章于 2019-08-24 09:30:08 发布

hjxu2016

最新推荐文章于 2019-08-24 09:30:08 发布

阅读量1.2k

点赞数 1

分类专栏：深度框架|TensorFlow & Keras

本文链接：https://blog.csdn.net/hjxu2016/article/details/78326153

版权

深度框架|TensorFlow & Keras 专栏收录该内容

32 篇文章 8 订阅

订阅专栏

看完官方的教程，发现官方的数据都已经做好了，现在我们有自己的手写数字图片，也就是从0开始，只有手写数字图片，该怎么识别呢？

感谢这位博主 http://www.cnblogs.com/denny402/p/5684431.html

mnist图片数据下载：http://pan.baidu.com/s/1pLMV4Kz

首先建立模型文件，我们命名为mnist_inference.py

代码如下

#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Mon Oct 23 16:31:46 2017

@author: hjxu
"""

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import tensorflow as tf

# 定义神经网络相关的参数
w = 32
h = 32
c = 1

# 定义神经网络的前向传播过程。
# 这里添加了一个新的参数train，用于区别训练过程和测试过程。
# 在这个程序中将用到dropout方法，dropout可以进一步提升模型可靠性并防止过拟合，dropout过程只在训练时使用。
def inference(input_tensor, train, regularizer):
        #第一层：卷积层，过滤器的尺寸为5×5，深度为6,不使用全0补充，步长为1。
    #尺寸变化：32×32×1->28×28×6
    with tf.variable_scope('layer1-conv1'):
        conv1_weights = tf.get_variable('weight',[5,5,c,6],initializer=tf.truncated_normal_initializer(stddev=0.1))
        conv1_biases = tf.get_variable('bias',[6],initializer=tf.constant_initializer(0.0))
        conv1 = tf.nn.conv2d(input_tensor,conv1_weights,strides=[1,1,1,1],padding='VALID')
        relu1 = tf.nn.relu(tf.nn.bias_add(conv1,conv1_biases))

    #第二层：池化层，过滤器的尺寸为2×2，使用全0补充，步长为2。
    #尺寸变化：28×28×6->14×14×6
    with tf.name_scope('layer2-pool1'):
        pool1 = tf.nn.max_pool(relu1,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')

    #第三层：卷积层，过滤器的尺寸为5×5，深度为16,不使用全0补充，步长为1。
    #尺寸变化：14×14×6->10×10×16
    with tf.variable_scope('layer3-conv2'):
        conv2_weights = tf.get_variable('weight',[5,5,6,16],initializer=tf.truncated_normal_initializer(stddev=0.1))
        conv2_biases = tf.get_variable('bias',[16],initializer=tf.constant_initializer(0.0))
        conv2 = tf.nn.conv2d(pool1,conv2_weights,strides=[1,1,1,1],padding='VALID')
        relu2 = tf.nn.relu(tf.nn.bias_add(conv2,conv2_biases))

    #第四层：池化层，过滤器的尺寸为2×2，使用全0补充，步长为2。
    #尺寸变化：10×10×6->5×5×16
    with tf.variable_scope('layer4-pool2'):
        pool2 = tf.nn.max_pool(relu2,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')

    #将第四层池化层的输出转化为第五层全连接层的输入格式。第四层的输出为5×5×16的矩阵，然而第五层全连接层需要的输入格式
    #为向量，所以我们需要把代表每张图片的尺寸为5×5×16的矩阵拉直成一个长度为5×5×16的向量。
    #举例说，每次训练64张图片，那么第四层池化层的输出的size为(64,5,5,16),拉直为向量，nodes=5×5×16=400,尺寸size变为(64,400)
    pool_shape = pool2.get_shape().as_list()
    nodes = pool_shape[1]*pool_shape[2]*pool_shape[3]
    reshaped = tf.reshape(pool2,[-1,nodes])

    #第五层：全连接层，nodes=5×5×16=400，400->120的全连接
    #尺寸变化：比如一组训练样本为64，那么尺寸变化为64×400->64×120
    #训练时，引入dropout，dropout在训练时会随机将部分节点的输出改为0，dropout可以避免过拟合问题。
    #这和模型越简单越不容易过拟合思想一致，和正则化限制权重的大小，使得模型不能任意拟合训练数据中的随机噪声，以此达到避免过拟合思想一致。
    #本文最后训练时没有采用dropout，dropout项传入参数设置成了False，因为训练和测试写在了一起没有分离，不过大家可以尝试。
    with tf.variable_scope('layer5-fc1'):
        fc1_weights = tf.get_variable('weight',[nodes,120],initializer=tf.truncated_normal_initializer(stddev=0.1))
        if regularizer != None:
            tf.add_to_collection('losses',regularizer(fc1_weights))
        fc1_biases = tf.get_variable('bias',[120],initializer=tf.constant_initializer(0.1))
        fc1 = tf.nn.relu(tf.matmul(reshaped,fc1_weights) + fc1_biases)
        if train:
            fc1 = tf.nn.dropout(fc1,0.5)

    #第六层：全连接层，120->84的全连接
    #尺寸变化：比如一组训练样本为64，那么尺寸变化为64×120->64×84
    with tf.variable_scope('layer6-fc2'):
        fc2_weights = tf.get_variable('weight',[120,84],initializer=tf.truncated_normal_initializer(stddev=0.1))
        if regularizer != None:
            tf.add_to_collection('losses',regularizer(fc2_weights))
        fc2_biases = tf.get_variable('bias',[84],initializer=tf.truncated_normal_initializer(stddev=0.1))
        fc2 = tf.nn.relu(tf.matmul(fc1,fc2_weights) + fc2_biases)
        if train:
            fc2 = tf.nn.dropout(fc2,0.5)

    #第七层：全连接层（近似表示），84->10的全连接
    #尺寸变化：比如一组训练样本为64，那么尺寸变化为64×84->64×10。最后，64×10的矩阵经过softmax之后就得出了64张图片分类于每种数字的概率，
    #即得到最后的分类结果。
    with tf.variable_scope('layer7-fc3'):
        fc3_weights = tf.get_variable('weight',[84,10],initializer=tf.truncated_normal_initializer(stddev=0.1))
        if regularizer != None:
            tf.add_to_collection('losses',regularizer(fc3_weights))
        fc3_biases = tf.get_variable('bias',[10],initializer=tf.truncated_normal_initializer(stddev=0.1))
        logit = tf.matmul(fc2,fc3_weights) + fc3_biases
        tf.add_to_collection('logits',logit)
    return logit

网络结构定义好了，要再编写一个训练的脚本，命名为mnist_train.py

#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Mon Oct 23 16:32:54 2017

@author: hjxu
"""

import os
import numpy as np

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

# 加载mnist_inference.py中定义的常量和前向传播的函数
from mnist_inference import  inference
from skimage import io,transform


w = 32
h = 32
c = 1


###load_file返回三个值，第一个是txt文件的每行空格前的字符，对应路径，第二个是每行的label，第三个是总个数###
def load_file(examples_list_file):
    # type: (object) -> object
    lines = np.genfromtxt(examples_list_file, delimiter=" ", dtype=[('col1', 'S120'), ('col2', 'i8')])
    examples = []
    labels = []
    for example, label in lines:
        examples.append(example)
        labels.append(label)
    return np.asarray(examples), np.asarray(labels), len(lines)    
###load_file返回三个值，第一个是txt文件的每行空格前的字符，对应路径，第二个是每行的label，第三个是总个数###


###  测试load——file###的一个例子###
def test_load_file():
    train_file = '/home/hjxu/PycharmProjects/tf_examples/mnist_to_jjh/mnist_data/train/train.txt'
    examples, labels, examples_num = load_file(train_file)
    for i in range(examples_num):
        print 'No',i,'path',examples[i],'label is',labels[i]
###测试load——file###的一个例子###



def read_img(img_folder,path_txt):
    images, labels, examples_num = load_file(path_txt)
    img_result = []
    label_result = []
    for i in range(examples_num):
        image = io.imread(images[i])
        image = transform.resize(image,(w,h,c))
        img_result.append(image)
        label_result.append(labels[i])
    return np.asarray(img_result,dtype=np.float32),np.asarray(label_result,dtype=np.int32)
        
img_folder = '/home/hjxu/PycharmProjects/tf_examples/mnist_to_jjh/'#存放图片的根目录
train_path_txt =  '/home/hjxu/PycharmProjects/tf_examples/mnist_to_jjh/mnist/train/train.txt'#图片列表的绝对路径
test_path_txt =  '/home/hjxu/PycharmProjects/tf_examples/mnist_to_jjh/mnist/test/test.txt'     
logs_train_dir = '/home/hjxu/PycharmProjects/tf_examples/mnist_to_jjh/ckpt/' #模型日志保存的文件夹路径

train_data,train_label = read_img(img_folder,train_path_txt)
test_data,test_label = read_img(img_folder,test_path_txt)

#打乱训练数据及测试数据
train_image_num = len(train_data)
train_image_index = np.arange(train_image_num)
np.random.shuffle(train_image_index)
train_data = train_data[train_image_index]
train_label = train_label[train_image_index]

test_image_num = len(test_data)
test_image_index = np.arange(test_image_num)
np.random.shuffle(test_image_index)
test_data = test_data[test_image_index]
test_label = test_label[test_image_index]


#搭建CNN
x = tf.placeholder(tf.float32,[None,w,h,c],name='x')
y_ = tf.placeholder(tf.int32,[None],name='y_')


regularizer = tf.contrib.layers.l2_regularizer(0.001)
y = inference(x,False,regularizer)
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y,labels=y_)
cross_entropy_mean = tf.reduce_mean(cross_entropy)
loss = cross_entropy_mean + tf.add_n(tf.get_collection('losses'))
train_op = tf.train.AdamOptimizer(0.001).minimize(loss)
correct_prediction = tf.equal(tf.cast(tf.argmax(y,1),tf.int32),y_)
accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))

#每次获取batch_size个样本进行训练或测试
def get_batch(data,label,batch_size):
    for start_index in range(0,len(data)-batch_size+1,batch_size):
        slice_index = slice(start_index,start_index+batch_size)
        yield data[slice_index],label[slice_index]

#创建Session会话


with tf.Session() as sess:
    #初始化所有变量(权值，偏置等)
    sess.run(tf.global_variables_initializer())

    #将所有样本训练10次，每次训练中以64个为一组训练完所有样本。
    #train_num可以设置大一些。
    train_num = 40
    batch_size = 64
    train_writer = tf.summary.FileWriter(logs_train_dir, sess.graph)
    saver = tf.train.Saver()

    for i in range(train_num):

        train_loss,train_acc,batch_num = 0, 0, 0
        for train_data_batch,train_label_batch in get_batch(train_data,train_label,batch_size):
            _,err,acc = sess.run([train_op,loss,accuracy],feed_dict={x:train_data_batch,y_:train_label_batch})
            train_loss+=err;train_acc+=acc;batch_num+=1
        print("train loss:",train_loss/batch_num)
        print("train acc:",train_acc/batch_num)

        test_loss,test_acc,batch_num = 0, 0, 0
        for test_data_batch,test_label_batch in get_batch(test_data,test_label,batch_size):
            err,acc = sess.run([loss,accuracy],feed_dict={x:test_data_batch,y_:test_label_batch})
            test_loss+=err;test_acc+=acc;batch_num+=1
        print("test loss:",test_loss/batch_num)
        print("test acc:",test_acc/batch_num)

        if i % 20 == 0 or (i + 1) == train_num:
            checkpoint_path = os.path.join(logs_train_dir, 'model.ckpt')
            saver.save(sess, checkpoint_path, global_step=i+1)

在ckpt文件夹中就会出现一些保存模型的文件，然后我们现在需要测试一张图片来测试我们的模型

我们建议一个脚本命名evaluation_one_image.py

# -*- coding: utf-8 -*-
import tensorflow as tf
from sys import path
import numpy as np
import tensorflow as tf
import time
import cv2
from PIL import Image
import random
import linecache
import matplotlib.pyplot as plt 
from mnist_inference import  inference

w = 32
h = 32
c = 1
##############测试一张指定的图片#############33
#image_root = '/home/hjxu/PycharmProjects/tf_examples/mnist_to_jjh/mnist/test/6/00011.png'
#im = Image.open(image_root)
#plt.imshow(im)
#plt.show()
#im = im.resize((32,32))
##############测试一张指定的图片#############33

###########随机从txt文件中测试一张图片##################3
def load_file(examples_list_file):
    # type: (object) -> object
    lines = np.genfromtxt(examples_list_file, delimiter=" ", dtype=[('col1', 'S120'), ('col2', 'i8')])
    examples = []
    labels = []
    for example, label in lines:
        examples.append(example)
        labels.append(label)
    return np.asarray(examples), np.asarray(labels), len(lines)    

def get_one_img(examples_list_file):
    img,label,num = load_file(examples_list_file)
    i = random.randint(1, (num))
    print img[i]
    return img[i]
############3随机从txt文件中测试一张图片##################3

examples_list_file = '/home/hjxu/PycharmProjects/tf_examples/mnist_to_jjh/mnist/train/train.txt'

image_root = '/home/hjxu/PycharmProjects/tf_examples/mnist_to_jjh/'+str(get_one_img(examples_list_file))
im = Image.open(image_root)
plt.imshow(im)
plt.show()
im = im.resize((32,32))
im = np.array(im).astype(np.float32)
im = np.reshape(im, [-1, 32 * 32 * 1])

batch_xs = np.reshape(im, [-1, 32, 32, 1])

x = tf.placeholder(tf.float32, shape=[1,32, 32, 1])

#
#logit = inference(x, False, None)
with tf.Session() as sess:
  new_saver = tf.train.import_meta_graph('/home/hjxu/PycharmProjects/tf_examples/mnist_to_jjh/ckpt/model.ckpt-21.meta')
  new_saver.restore(sess,  tf.train.latest_checkpoint('/home/hjxu/PycharmProjects/tf_examples/mnist_to_jjh/ckpt/'))  
    # tf.get_collection() 返回一个list. 但是这里只要第一个参数即可
  logit = tf.nn.softmax(tf.get_collection('logits')[0])
  graph = tf.get_default_graph()

  # 因为y中有placeholder，所以sess.run(y)的时候还需要用实际待预测的样本以及相应的参数来填充这些placeholder，而这些需要通过graph的get_operation_by_name方法来获取。
  input_x = graph.get_operation_by_name('x').outputs[0]

  out = sess.run(logit, feed_dict={input_x:batch_xs})

  print np.argmax(out)

这个脚本可以指定测试一张图片，也可以随机读取一张图片

hjxu2016

关注

1
点赞
踩
3

收藏

觉得还不错? 一键收藏
2
评论
tensorflow mnist手写数字（三）从零开始

看完官方的教程，发现官方的数据都已经做好了，现在我们有自己的手写数字图片，也就是从0开始，只有手写数字图片，该怎么识别呢？感谢这位博主 http://www.cnblogs.com/denny402/p/5684431.htmlmnist图片数据下载：http://pan.baidu.com/s/1pLMV4Kz首先建立模型文件，我们命名为mnist_infere
复制链接

扫一扫