cnn实现手写识别字体代码详解

最新推荐文章于 2024-08-02 12:58:05 发布

stanpcf

最新推荐文章于 2024-08-02 12:58:05 发布

阅读量7.5k

点赞数 4

分类专栏：深度学习文章标签： cnn deeplearn tensorlow mnist

本文链接：https://blog.csdn.net/stan_pcf/article/details/57631997

版权

深度学习专栏收录该内容

1 篇文章 0 订阅

订阅专栏

按照tensorflow 官方文档实现，并对代码进行了详解

#!/usr/bin/env python
#-*- coding: utf-8 -*-

# File Name: mnist_beginners/mnist_pros.py
# Author: pcf
# Created Time: 2017-02-25

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data


# 创建一个多层卷积网络

# 权重初始化
def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)

# bias 初始化
def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

# 卷积
def conv2d(x, w):
    return tf.nn.conv2d(x, w, strides=[1, 1, 1, 1], padding="SAME")

# 2x2 max pooling
# ksize=[patch, height, width, channel], 该参数为[1,2,2,1]表示
# 不在patch 和channel上池化.  
# strides=[patch, height,width,channel] 1表示跨越为1,当stride大于一的时候，
# stride>1相当于卷积和下采样两个操作,在实际操作中，strides>1比卷积加下采样计算量少了好几倍，具有很强的实践意义
def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')


mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
sess = tf.InteractiveSession()

x = tf.placeholder("float", shape=[None, 784])
y_ = tf.placeholder("float", shape=[None, 10])

W = tf.Variable(tf.zeros([784,10]))
b = tf.Variable(tf.zeros([10]))

# 第一层卷积
# 前三个是patch大小，patch的shape是(5,5,1),第三个参数是输入的通道数目，这个一般是和上层相同的,即深度上保持一致。最后一个是输出通道的数目
# 输入通道的数目代表输入通道侧有几个卷积核，输出通道的数目代表输出通道侧
# 到下一层有几个卷积核. 这一层卷积产生了32个28x28的feature map. 
# 第一层卷积一共有32个卷积核需要学习.因为下面的图像是黑白图像输入通道为1，
# 故而这儿第三个参数设置为1。如果图片为彩色, 这儿第三个参数应该设置为3
w_conv1 = weight_variable([5, 5, 1, 32])

b_conv1 = bias_variable([32])

# x_image用于卷积的输入。shape的四个元素。  
# 第二个，第三个对应图片的宽高，最后一维代表图片的颜色通道数,如果是彩色则为3,代表了3基色,
# 相当于图像由三张图像叠加形成的，每张图像由其中一种基色组成. 
# 第一个数-1表示元素的个数除以后三个数后的数，表示训练时一个batch的图片数量.
x_image = tf.reshape(x, [-1, 28, 28, 1])

# relu神经元, 相比sogmoid函数优势是引入稀疏性，可以加快训练，
# 防止梯度消失, 学习特征快，deeplearning中的大部分激活函数应该选择relu
# 在训练的时候relu单元可能'死掉', 特别是学习率比较高的时候
h_conv1 = tf.nn.relu(conv2d(x_image, w_conv1)+ b_conv1)

# 通过stride为2的卷积,这个地方的图像shape变成了[-1,14,14,1]。
# 通过池化讲这一层的32个28x28的feature map 变成了32个14x14 feature map
h_pool1 = max_pool_2x2(h_conv1)

# 第二层卷积
# 第二层卷积核的参数初始化,cnn虽然参数共享，但是参数共享是同一层而言的,每一层都有自己的卷积核需要学习. 
# 这一层有64个通道，代表着这一层一共有64个卷积核需要学习. 每个卷积核的shape=(5,5,32)
# 因为上一层池化后传过来的是14x14的feature map, 这一层将产生64个14x14个feature map。
w_conv2 = weight_variable([5, 5, 32, 64])   
b_conv2 = bias_variable([64])

h_conv2 = tf.nn.relu(conv2d(h_pool1, w_conv2) + b_conv2)

# 这一层卷积的产生了64个14x14个feature map。
# 通过这一层的池化产生了64个7*7的feature map
h_pool2 = max_pool_2x2(h_conv2) 

# 密集连接层
# 第二个卷积层(这儿将一系列的卷积操作,relu操作,池化操作看做一个卷积层)
#产生了64个7x7的feature map, 这儿使输出是1024个特征(这个数是可以根据选择定的，
# 和前面的操作没有关系,比如可以设置为1000),讲每一个像素看成一个特征的话,
# 那么第二层卷积层产生了64*7*7个feature，他们和输出层设定的1024个单元全连接,
# 其实就是[64*7*7,1024]个参数需要学习(其实这一层和前面的卷积层没什么区别,
# 不失一般性，我们拿第二层卷积层说,第二个卷积层卷积核是w_conv2(暂时不考虑偏执,
# w_conv2的shape是[5,5,32,64])第二层接受的是32个5x5 feature map ，
# 需要输出64个channel，对于每个feature map(14x14) 需要学习5*5*64个参数, 
# 一共有32个feature map。如果没有参数共享，需要学习32*14*14*64个参数)
w_fc1 = weight_variable([7*7*64, 1024])
b_fc1 = bias_variable([1024])

h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])    # 讲特征展平
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, w_fc1)+b_fc1)

# dropout, 输出层之前加入dropout防止过拟合
keep_prob = tf.placeholder('float')
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

# output layer, softmax
w_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])
y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, w_fc2) + b_fc2)    # tf.matmul(x,w) 为矩阵相乘


# y= tf.nn.softmax(tf.matmul(x,W) + b)
cross_entropy = -tf.reduce_sum(y_*tf.log(y_conv))

train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)

correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))

accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

sess.run(tf.global_variables_initializer())
for i in range(20000):
    batch = mnist.train.next_batch(50)
    if i % 100 == 0:
        train_accuracy = accuracy.eval(feed_dict={
            x:batch[0], y_:batch[1], keep_prob:1.0})
        print "step %d, training accuracy %g" % (i, train_accuracy)
        train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob:0.5})

print "test accuracy %g" % accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels, keep_prob:1.0})