tensorflow之lenet训练手写字及应用

我的目标是用tensorflow实现视频质量诊断,但是馒头还是需要一个一个吃,先从工程应用的角度实现用python训练手写字,并在C#中调用识别自己写的手写字。

思路如下:

使用lenet网络训练完数据后,保存为pb,再通过tensorflowsharp调用pb实现对数字的识别。

通过一天的尝试终于可以把手写字保存为pb,并成功调用pb实现数字识别。

""" Convolutional Neural Network.

Build and train a convolutional neural network with TensorFlow.
This example is using the MNIST database of handwritten digits
(http://yann.lecun.com/exdb/mnist/)

Author: Aymeric Damien
Project: https://github.com/aymericdamien/TensorFlow-Examples/
"""

from __future__ import division, print_function, absolute_import

import tensorflow as tf
import cv2 as cv
import numpy as np
# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)

# Training Parameters
learning_rate = 0.001
num_steps =200 #200
batch_size = 128
display_step = 10

# Network Parameters
num_input = 784 # MNIST data input (img shape: 28*28)
num_classes = 10 # MNIST total classes (0-9 digits)
dropout = 0.75 # Dropout, probability to keep units

# tf Graph input
X = tf.placeholder(tf.float32, [None, num_input],name="pic")
Y = tf.placeholder(tf.float32, [None, num_classes],name='lable')
keep_prob = tf.placeholder(tf.float32,name='prob') # dropout (keep probability)


# Create some wrappers for simplicity
def conv2d(x, W, b, strides=1):
    # Conv2D wrapper, with bias and relu activation
    x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
    x = tf.nn.bias_add(x, b)
    return tf.nn.relu(x)


def maxpool2d(x, k=2):
    # MaxPool2D wrapper
    return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1],
                          padding='SAME')


# Create model
def conv_net(x, weights, biases, dropout):
    # MNIST data input is a 1-D vector of 784 features (28*28 pixels)
    # Reshape to match picture format [Height x Width x Channel]
    # Tensor input become 4-D: [Batch Size, Height, Width, Channel]
    x = tf.reshape(x, shape=[-1, 28, 28, 1])

    # Convolution Layer
    conv1 = conv2d(x, weights['wc1'], biases['bc1'])
    # Max Pooling (down-sampling)
    conv1 = maxpool2d(conv1, k=2)

    # Convolution Layer
    conv2 = conv2d(conv1, weights['wc2'], biases['bc2'])
    # Max Pooling (down-sampling)
    conv2 = maxpool2d(conv2, k=2)

    # Fully connected layer
    # Reshape conv2 output to fit fully connected layer input
    fc1 = tf.reshape(conv2, [-1, weights['wd1'].get_shape().as_list()[0]])
    fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
    fc1 = tf.nn.relu(fc1)
    # Apply Dropout
    fc1 = tf.nn.dropout(fc1, dropout)

    # Output, class prediction
    out = tf.add(tf.matmul(fc1, weights['out']), biases['out'])
    return out

# Store layers weight & bias
weights = {
    # 5x5 conv, 1 input, 32 outputs
    'wc1': tf.Variable(tf.random_normal([5, 5, 1, 32])),
    # 5x5 conv, 32 inputs, 64 outputs
    'wc2': tf.Variable(tf.random_normal([5, 5, 32, 64])),
    # fully connected, 7*7*64 inputs, 1024 outputs
    'wd1': tf.Variable(tf.random_normal([7*7*64, 1024])),
    # 1024 inputs, 10 outputs (class prediction)
    'out': tf.Variable(tf.random_normal([1024, num_classes]))
}

biases = {
    'bc1': tf.Variable(tf.random_normal([32])),
    'bc2': tf.Variable(tf.random_normal([64])),
    'bd1': tf.Variable(tf.random_normal([1024])),
    'out': tf.Variable(tf.random_normal([num_classes]))
}

# Construct model
logits = conv_net(X, weights, biases, keep_prob)
prediction = tf.nn.softmax(logits,name="op_to_store")

# Define loss and optimizer
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
    logits=logits, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)


# Evaluate model
correct_pred = tf.equal(tf.argmax(prediction, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# Initialize the variables (i.e. assign their default value)
init = tf.global_variables_initializer()

# Start training
with tf.Session() as sess:

    # Run the initializer
    sess.run(init)
    for step in range(1, num_steps+1):
        batch_x, batch_y = mnist.train.next_batch(batch_size)
        # Run optimization op (backprop)
        sess.run(train_op, feed_dict={X: batch_x, Y: batch_y, keep_prob: 0.8})
        if step % display_step == 0 or step == 1:
            # Calculate batch loss and accuracy
            loss, acc = sess.run([loss_op, accuracy], feed_dict={X: batch_x,
                                                                 Y: batch_y,
                                                                 keep_prob: 1.0})
            print("Step " + str(step) + ", Minibatch Loss= " + \
                  "{:.4f}".format(loss) + ", Training Accuracy= " + \
                  "{:.3f}".format(acc))

    print("Optimization Finished!")

    constant_graph = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op_to_store'])
    with tf.gfile.FastGFile('modellenet.pb', mode='wb') as f:
        f.write(constant_graph.SerializeToString()) 
    #Calculate accuracy for 256 MNIST test images
    print("Testing Accuracy:", sess.run(accuracy, feed_dict={X: mnist.test.images[:256],Y: mnist.test.labels[:256],keep_prob: 1.0}))
    #print(sess.run(prediction,feed_dict={X: mnist.test.images[:1],keep_prob: 1.0}))

    picpath = 'C:/Users/shenwei/Desktop/test/tt/3.jpg'
    image = cv.imread(picpath,0)
#cvimag = np.zeros(image.shape, dtype=np.float32)
    data1 = np.asarray(image,dtype=np.float32)
    data2 = np.reshape(data1,newshape =(1,-1))
    data3 = np.where(data2>0,data2,0)
    data3 =data3/255
    print(sess.run(prediction,feed_dict={X: data3,keep_prob: 1.0}))

训练出的pb文件如下:

 

 再写了一个python调用pb的demo

# -*-coding:utf-8 -*-
import tensorflow as tf
import os
import numpy as np
import sys
import io
import cv2 as cv
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'


sys.stdout=io.TextIOWrapper(sys.stdout.buffer,encoding='utf8')


#coding=utf-8



#-*- coding:utf-8 -*-

import tensorflow as tf


picpath = 'C:/Users/shenwei/Desktop/test/tt/3.jpg'
image = cv.imread(picpath,0)
#cvimag = np.zeros(image.shape, dtype=np.float32)
data1 = np.asarray(image,dtype=np.float32)
data2 = np.reshape(data1,newshape =(1,-1))
data3 = np.where(data2>0,data2,0)
data3 =data3/255
sess = tf.Session()

with tf.gfile.FastGFile('modellenet.pb', 'rb') as f:
    graph_def = tf.GraphDef()
    graph_def.ParseFromString(f.read())
    sess.graph.as_default()
    tf.import_graph_def(graph_def, name='') # 导入计算图
# 需要有一个初始化的过程    
sess.run(tf.global_variables_initializer())
input_x = sess.graph.get_tensor_by_name('pic:0')
prob = sess.graph.get_tensor_by_name('prob:0')
op = sess.graph.get_tensor_by_name('op_to_store:0')
ret = sess.run(op,  feed_dict={input_x: data3,prob:1})
print(ret)
print(sess.run(tf.argmax(ret,1)))
# 输出 3


 输出结果果然是3

图片长这样

**********************************由于本人开发环境是C#环境,还需要写一个C#的demo进行测试************

C#调用opencv太过痛苦,暂时不再进行,下面我想尝试一下新的挑战

现在我们的图片都是28*28的灰度图,我想修改为训练使用28*28的图片,然后保存为pb之后,可以对任意的图片进行识别。

目标:识别任何分辨率的图片

尝试了一下把训练模型修改为任意分辨率,但是最后失败了。感觉难度很大,目前还是没有思路。但是我决定换一个思路来解决问题,通过opencv把任意分辨率的图片转换成何训练模型一样的图片,再进行识别,尽管识别率有所降低,但是还是可以识别的。

对于分辨率的修改,参考我的这一篇文章。

https://blog.csdn.net/g0415shenw/article/details/87356832

看一下效果:

识别出来的结果:

成功识别出结果为1

至此,手写字的识别从训练到应用基本上落一段路。下面开始更加有难度的挑战,对图像进行分类。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值