介绍使用卷积神经网络来实现手写数字集的识别
主要采用面向对象的变成方法实现, 代码可以直接运行,分别有训练模型,保存模型,和运用保存好的模型测试单张图片
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
import cv2
class Model:
def __init__(self, learning_rate, batch_size, iterations, classes):
"""
:param learning_rate: 学习率
:param batch_size: 每个batc
h的大小
:param iterations: 迭代的次数
:param classes: 输出的类别数目
"""
self.learning_rage = learning_rate
self.batch_size = batch_size
self.iterations = iterations
self.classes = classes
self.loss = []
# 记录迭代的轮数和精度值
self.accuracy = []
def weight_variable(self, shape, name):
"""
:param shape: 参数的的维度
:return: 初始化之后的权重参数矩阵
"""
initial = tf.truncated_normal(shape, stddev=0.1, name=name)
return tf.Variable(initial)
# 偏置项初始化函数
def bias_variable(self, shape, name):
"""
:param shape:偏置项的形状
:return: 返回初始化结果,这里初始化为0
"""
initial = tf.constant(0., shape=shape, name=name)
return tf.Variable(initial)
# 卷积操作,步长设置为1
def conv2d(self, x, W):
"""
:param x: 输入的数据
:param W: 卷积核的形状
:return: 卷积操作的结果
"""
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding="SAME")
# 池化层操作,步长设置为2
def max_pool_2x2(self, x):
"""
:param x: 输入值
:return: 最大池化后的结果
"""
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")
# 创建模型
def buile_model(self, x, keep_prob):
"""
:return: 在构建好的模型上的预测值
"""
# 将x转变成4维的矩阵,第一个参数表示样本的个数,2和3两个参数表示图片的宽和高,最后一个表示图片的颜色通道数
# 由于是灰度图像,所以这里的通道数是1. 第一个数字设置成-1表示自动匹配
x_image = tf.reshape(x, [-1, 28, 28, 1])
#实现第一层卷积操作,使用32个大小为3x3通道数目为1的卷积核
W_conv1 = self.weight_variable([3, 3, 1, 32], name="W_conv1")
b_conv1 = self.bias_variable([32], name="b_conv1")
h_conv1 = tf.nn.relu(self.conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = self.max_pool_2x2(h_conv1)
#实现第二层卷积操作,使用64个大小为3x3通道数目为32的卷积核
W_conv2 = self.weight_variable([3, 3, 32, 64], name="W_conv2")
b_conv2 = self.bias_variable([64], name="b_conv2")
h_conv2 = tf.nn.relu(self.conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = self.max_pool_2x2(h_conv2)
# 第一层全连接层
W_fc1 = self.weight_variable([7*7*64, 1024], name="W_fc1")
b_fc1 = self.bias_variable([1024], name="b_fc1")
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
# 第二层全连接层,即输出层
W_fc2 = self.weight_variable([1024, self.classes], name="W_fc2")
b_fc2 = self.bias_variable([self.classes], name="b_fc2")
y_prediction = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
return y_prediction
# 训练模型
def train(self):
# self.CNNCallback = CNNCallback
# 设置x, y的占位符,运行的时候可以根据占位符号输入具体的值
x = tf.placeholder(tf.float32, shape=[None, 784], name="x")
y = tf.placeholder(tf.float32, shape=[None, 10])
keep_prob = tf.placeholder(tf.float32)
# 加载数据
self.mnist = input_data.read_data_sets("MNIST_data", one_hot=True)
# 接收模型计算所得的y_prediction
y_prediction = self.buile_model(x, y, keep_prob)
# 交叉熵值损失函数
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=y_prediction))
# 使用梯度下降进行反向传播更新参数
train_step = tf.train.AdamOptimizer(self.learning_rage).minimize(cross_entropy)
# 预测值和真实值是否相等
correct_prediction = tf.equal(tf.argmax(y_prediction, 1), tf.argmax(y, 1))
# 评估模型的精度
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# 创建一个会话,开始训练网络
with tf.Session() as sess:
# 初始化所有张量
sess.run(tf.global_variables_initializer())
for i in range(self.iterations):
# 从训练数据集中拿出batch_size个样本
batch = self.mnist.train.next_batch(self.batch_size)
# 每隔20轮打印一次结果,并且保存精度和损失
if i % 20 == 0:
# 获取精度和损失
train_accuracy = accuracy.eval(feed_dict={x: batch[0], y: batch[1], keep_prob:1})
loss = cross_entropy.eval(feed_dict={x: batch[0], y: batch[1], keep_prob:1})
print("step %d, loss is %.4f, training accuracy %.4f" % (i, loss, train_accuracy))
# 返回精度和损失
# self.CNNCallback(loss, train_accuracy)
# 保存精度和损失
self.accuracy.append([i, train_accuracy])
self.loss.append([i, loss])
train_step.run(feed_dict={x: batch[0], y: batch[1], keep_prob: 0.5})
# 输出测试集的精度
print("Testing Accuracy:",
sess.run(accuracy, feed_dict={x: self.mnist.test.images,
y: self.mnist.test.labels,
keep_prob: 0.7}))
# 训练模型并保存模型
def train_and_save(self):
# self.CNNCallback = CNNCallback
# 设置x, y的占位符,运行的时候可以根据占位符号输入具体的值
x = tf.placeholder(tf.float32, shape=[None, 784], name="x")
y = tf.placeholder(tf.float32, shape=[None, 10])
keep_prob = tf.placeholder(tf.float32)
# 加载数据
self.mnist = input_data.read_data_sets("MNIST_data", one_hot=True)
# 接收模型计算所得的y_prediction
y_prediction = self.buile_model(x, keep_prob)
# 交叉熵值损失函数
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=y_prediction))
# 使用梯度下降进行反向传播更新参数
train_step = tf.train.AdamOptimizer(self.learning_rage).minimize(cross_entropy)
# 预测值和真实值是否相等
correct_prediction = tf.equal(tf.argmax(y_prediction, 1), tf.argmax(y, 1))
# 评估模型的精度
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# 保存模型
saver = tf.train.Saver()
# 创建一个会话,开始训练网络
with tf.Session() as sess:
# 初始化所有张量
sess.run(tf.global_variables_initializer())
for i in range(self.iterations):
# 从训练数据集中拿出batch_size个样本
batch = self.mnist.train.next_batch(self.batch_size)
# 每隔20轮打印一次结果,并且保存精度和损失
if i % 20 == 0:
# 获取精度和损失
train_accuracy = accuracy.eval(feed_dict={x: batch[0], y: batch[1], keep_prob: 1})
loss = cross_entropy.eval(feed_dict={x: batch[0], y: batch[1], keep_prob: 1})
print("step %d, loss is %.4f, training accuracy %.4f" % (i, loss, train_accuracy))
# 返回精度和损
self.accuracy.append([i, train_accuracy])
self.loss.append([i, loss])
train_step.run(feed_dict={x: batch[0], y: batch[1], keep_prob: 0.5})
# 输出测试集的精度
print("Testing Accuracy:",
sess.run(accuracy, feed_dict={x: self.mnist.test.images,
y: self.mnist.test.labels,
keep_prob: 0.7}))
saver.save(sess, "./model/save_net.ckpt")
tf.add_to_collection("y_prediction", y_prediction)
# 返回精度
def get_accuracy(self):
return self.accuracy
# 返回损失
def get_loss(self):
return self.loss
def test_picture(self):
# 随机获取图片
picture_path = "pictures/mnist_test_"+str(np.random.randint(0, 50))+".jpg"
# 声明输入图片数据,类别
picture_path = "/home/yuhufei/PycharmProjects/Project/Project/Test1_基础版本/demo/actual_pictures/eight.jpg"
x = tf.placeholder('float', [None, 784])
keep_prob = tf.placeholder(tf.float32)
y_predict = self.buile_model(x, keep_prob)
im = cv2.imread(picture_path, cv2.IMREAD_GRAYSCALE).astype(np.float32)
im = cv2.resize(im, (28, 28), interpolation=cv2.INTER_CUBIC)
# 图片预处理
# 数据从0~255转为-0.5~0.5
img_gray = (im - (255 / 2.0)) / 255
x_img = np.reshape(img_gray, [-1, 784])
saver = tf.train.Saver()
with tf.Session() as sess:
saver.restore(sess, "./model/save_net.ckpt")
output = sess.run(y_predict, feed_dict={x: x_img, keep_prob: 0.5})
predic_result = np.argmax(output)
print('the predict is : ', predic_result)
cv2.imshow('out', img_gray)
cv2.waitKey(0)
cv2.destroyAllWindows()
运行模型
model = Model(learning_rate=0.0001, iterations=5000, batch_size=32, classes=10)
model.train()
加粗样式