**
手写数字识别我分三个部分说明:
**
- 步骤简要说明(看懂大致流程);
- 代码实现(完整代码);
- 说明(使用说明)
步骤简要说明:
前向传播:搭建模型。前面这张图只是搭建神经网路的前向传播,即是神经网络的主体框架,最重要的部分。
反向传播:优化模型参数。这应该是老套路,通过梯度下降法或者其他方法使得损失函数变得越来越小,从而优化前向传播的参数权重W、偏置B。其中有些步骤我把它拎出来大致简述下:
- 滑动平均(MovingAverage):使下一次参数更新考虑到前面数据的影响。
- 正则化项(regularize):抑制某些参数(对模型影响很大的参数)对模型的影响或贡献,防止过拟合。
- 指数学习率衰减(exponential_decay):自动调节学习率,一是加快训练速度;二是防止学习率过大从而导致的待优化参数取不到最优值。
- 交叉熵(cross_entropy):一种分类机制。
测试程序:在建立好的模型上测试新的数据集,观察其准确率是多少。这里知识点无非就是模型的加载。
应用程序:将自己的图片放入模型看看预测值,即是整个神经网络的目的。我是通过OpenCV进行处理图片的,也可以利用PIL。我这里有三个版本。
- app1.0是输入要预测图片的个数,然后一张一张的输入图片地址,进行预测,最后将结果利用可视化函数显示。
- app2.0在1.0的基础上加了批量化处理,即是将指定图片自动读入模型,解放双手。
- app3.0在2.0的基础上优化了阈值的取法,通过频率直方图观察图片像素值频率的分布,从而找到合适的阈值(这里还是有点麻烦,后面在完善);添加了准确率显示,不用一个一个的眼动的去数啦。
**
代码实现
- 前向传播部分(forward.py)
import tensorflow as tf
IMAGE_SIZE = 28
NUM_CHANNELS = 1
CONV1_SIZE = 5
CONV1_KERNEL_NUM = 32
CONV2_SIZE = 5
CONV2_KERNEL_NUM = 64
FC_NODE = 512
OUTPUT_NODE = 10
def getWeight(shape,regularizer):
w = tf.Variable(tf.truncated_normal(shape,stddev=0.1))
if regularizer != None: tf.add_to_collection("losses",tf.contrib.layers.l2_regularizer(regularizer)(w))
return w
def getBias(shape):
b = tf.Variable(tf.zeros(shape))
return b
def conv2d(x,w):
return tf.nn.conv2d(x,w,strides=[1,1,1,1],padding="SAME")
def max_pool_2x2(x):
return tf.nn.max_pool(x,ksize=[1,2,2,1],strides=[1,2,2,1],padding="SAME")
def forward(x,train,regularizer):
conv1_w = getWeight([CONV1_SIZE,CONV1_SIZE,NUM_CHANNELS,CONV1_KERNEL_NUM],regularizer)
conv1_b = getBias([CONV1_KERNEL_NUM])
conv1 = conv2d(x,conv1_w)
relu1 = tf.nn.relu(tf.nn.bias_add(conv1,conv1_b))
pool1 = max_pool_2x2(relu1)
conv2_w = getWeight([CONV2_SIZE,CONV2_SIZE,CONV1_KERNEL_NUM,CONV2_KERNEL_NUM],regularizer)
conv2_b = getBias([CONV2_KERNEL_NUM])
conv2 = conv2d(pool1,conv2_w)
relu2 = tf.nn.relu(tf.nn.bias_add(conv2,conv2_b))
pool2 = max_pool_2x2(relu2)
#print ( "pool2: ",pool2," type:",type( pool2))
pool_shape = pool2.get_shape().as_list()
nodes = pool_shape[1]*pool_shape[2]*pool_shape[3]
reshaped = tf.reshape(pool2,[pool_shape[0],nodes])
fc1_w = getWeight([nodes,FC_NODE],regularizer)
fc1_b = getBias([FC_NODE])
fc1 = tf.nn.relu(tf.matmul(reshaped,fc1_w) + fc1_b)
if train : fc1=tf.nn.dropout(fc1,0.5)
fc2_w = getWeight([FC_NODE,OUTPUT_NODE],regularizer)
fc2_b = getBias([OUTPUT_NODE])
y = tf.matmul(fc1,fc2_w) + fc2_b
return y
- 反向传播部分(backward.py)
import tensorflow as tf
import numpy as np
import os
import forward
from tensorflow.examples.tutorials.mnist import input_data
BATCH_SIZE = 100
REGULARIZER = 0.0001
LEARNING_RATE_BASE = 0.005
LEARNING_RATE_DECAY = 0.99
MOVING_AVERAGE_DECAY = 0.99
STEP = 50000
MODEL_SAVE_PATH = "./model/"
MODEL_NAME = "mnist_model"
def backward(mnist):
x = tf.placeholder(tf.float32, [
BATCH_SIZE,
forward.IMAGE_SIZE,
forward.IMAGE_SIZE,
forward.NUM_CHANNELS])
y_ = tf.placeholder(tf.float32, [None,forward.OUTPUT_NODE])
y = forward.forward(x,True,REGULARIZER)
global_step = tf.Variable(0,trainable=False)
#y = forward.forward(x,True,REGULARIZER)
learning_rate = tf.train.exponential_decay(
LEARNING_RATE_BASE,
global_step,
mnist.train.num_examples/BATCH_SIZE,
LEARNING_RATE_DECAY,
staircase=True)
ce = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y,labels=tf.argmax(y_,1))
cem = tf.reduce_mean(ce)
loss = cem + tf.add_n(tf.get_collection("losses"))
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss,global_step=global_step)
ema = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY,global_step)
ema_op = ema.apply(tf.trainable_variables())
with tf.control_dependencies([train_step,ema_op]):
train_op = tf.no_op(name="train")
saver = tf.train.Saver()
with tf.Session() as sess:
init_op = tf.global_variables_initializer()
sess.run(init_op)
ckpt = tf.train.get_checkpoint_state(MODEL_SAVE_PATH)
if ckpt and ckpt.model_checkpoint_path:
saver.restore(sess,ckpt.model_checkpoint_path)
for i in range(STEP):
xs,ys = mnist.train.next_batch(BATCH_SIZE)
reshape_xs = np.reshape(xs,(
BATCH_SIZE,
forward.IMAGE_SIZE,
forward.IMAGE_SIZE,
forward.NUM_CHANNELS))
_,loss_value,step = sess.run([train_op,loss,global_step], feed_dict={x:reshape_xs, y_:ys})
if i%100==0:
print "After %d step(s): the loss is %f"%(step,loss_value)
saver.save(sess,os.path.join(MODEL_SAVE_PATH,MODEL_NAME),global_step=global_step)
def main():
mnist = input_data.read_data_sets("./data/",one_hot=True)
backward(mnist)
if __name__ == "__main__":
main()
- 测试部分(test.py):
import tensorflow as tf
import time
import forward
import backward
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
TEST_INTERVAL_TIME = 4
def test(mnist):
with tf.Graph().as_default() as g:
x = tf.placeholder(tf.float32,[
mnist.test.num_examples,
forward.IMAGE_SIZE,
forward.IMAGE_SIZE,
forward.NUM_CHANNELS])
y_ = tf.placeholder(tf.float32,[None, forward.OUTPUT_NODE])
y = forward.forward(x,False,None)
ema = tf.train.ExponentialMovingAverage(backward.MOVING_AVERAGE_DECAY)
ema_restore = ema.variables_to_restore()
saver = tf.train.Saver(ema_restore)
correct_predict = tf.equal(tf.argmax(y,1),tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_predict,tf.float32))
while True:
with tf.Session() as sess:
ckpt = tf.train.get_checkpoint_state(backward.MODEL_SAVE_PATH)
if ckpt and ckpt.model_checkpoint_path:
saver.restore(sess,ckpt.model_checkpoint_path)
global_step = ckpt.model_checkpoint_path.split("/")[-1].split("-")[-1]
reshaped_xs = np.reshape(mnist.test.images,(
mnist.test.num_examples,
forward.IMAGE_SIZE,
forward.IMAGE_SIZE,
forward.NUM_CHANNELS))
accuracy_score = sess.run(accuracy,feed_dict={x:reshaped_xs,y_:mnist.test.labels})
print ("After %s training step(s), the accuracy is %g"%(global_step,accuracy_score))
else:
print ("Not Found Model")
return
time.sleep(TEST_INTERVAL_TIME)
def main():
mnist = input_data.read_data_sets("./data/",one_hot=True)
test(mnist)
if __name__ == "__main__":
main()
- 应用部分(app2.0.py):
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import cv2
import os
import forward
import backward
def uniformFormat(path):
or_img = cv2.imread(path)
threshold = 200
_,img0 = cv2.threshold(or_img,threshold,255,cv2.THRESH_BINARY)
img = cv2.cvtColor(img0,cv2.COLOR_BGR2GRAY)
re_img = cv2.resize(img,(28,28))
new_img = [i for i in range(28*28)]
pos = 0
for i in range(28):
for j in range(28):
new_img[pos] = 255.0 - re_img[i][j]
pos += 1
return new_img,re_img
def estimateValue(image):
with tf.Graph().as_default() as g:
x = tf.placeholder(tf.float32,[1,forward.IMAGE_SIZE,forward.IMAGE_SIZE,forward.NUM_CHANNELS])
y = forward.forward(x,False,None)
#print "img",image,type(image)
value = tf.argmax(y,1)
ema = tf.train.ExponentialMovingAverage(backward.MOVING_AVERAGE_DECAY)
variables_restore = ema.variables_to_restore()
saver = tf.train.Saver(variables_restore)
ckpt = tf.train.get_checkpoint_state(backward.MODEL_SAVE_PATH)
if ckpt and ckpt.model_checkpoint_path:
with tf.Session() as sess:
saver.restore(sess,ckpt.model_checkpoint_path)
re_image = np.reshape(image,[1,forward.IMAGE_SIZE,forward.IMAGE_SIZE,forward.NUM_CHANNELS])
y_value = sess.run(y,feed_dict={x:re_image})
est_value = sess.run(value,feed_dict={y:y_value})
return est_value
else:
return "Not found Model!"
def application( ):
image_result_dict=dict()
#num = int(input("how many picture you want to test?\n"))
all_file =list(os.walk("./img"))[0]
all_img = all_file[-1]
for i in range(len(all_img)):
#name = raw_input("please input your picture path:")
name = all_img[i]
try:
path = os.path.join("img",name)
if os.path.exists(path):
image,re_image = uniformFormat(path)
value = estimateValue(image)
image_result_dict[str(i)+"_"+str(value[0])] =re_image
print (all_img[i],"estimated value is :", value[0])
else:
if name == "exit":
break
print ("File Path is Wrong")
except:
pass
return image_result_dict
def matplot(image_result_dict):
title,image =[],[]
for k,v in image_result_dict.items():
image.append(v)
title.append(str(k))
num_sp = (2,len(image)/2 + 1)
plt.figure(figsize=(10,10),dpi=80)
for i in range(len(image)):
plt.subplot(num_sp[0],num_sp[1],i+1)
plt.imshow(image[i],"gray")
plt.title(title[i])
plt.show()
if __name__ == "__main__":
image_result_dict = application()
matplot(image_result_dict)
说明
- 学习资源在这里哦(免费打广告啦)
tensorflow:https://www.icourse163.org/learn/PKU-1002536002?tid=1206591210#/learn/content
opencv:https://github.com/ex2tron/OpenCV-Python-Tutorial
神经网络原理:https://edu.aliyun.com/roadmap/ai?spm=5176.13345299.1392477.4.7001f153JPsPdf - 因为要批处理,可能需要修改你图片的地址。如果不想修改代码可以新建一个文件夹,里面是这样的:
img文件夹内直接放图片。
还等什么呢!快去跑一下吧,享受下神经网络带来的乐趣。
——————————————————————————
写代码的时候发现前项传播的那个解释图激活层掉了,分别补充在前三层网络后。代码是写上了的哟,所以代码是没问题的哈
如有问题,欢迎各位大佬指正!