精简代码、模型持久化、测试结果输出
到目前为止,我们已经完成了导入数据、构建网络、训练网络、加速训练这些深度学习中最为关键的步骤,我们也在一定程度上解决了我们这个图像复原问题,可以说我们已经入门深度学习了!Congratulation!
但是,我们的所有代码都在一个 .py 文件中,这样又臭又长的程序十分不利于交流和维护,因此我们需要精简我们的代码。具体做法是,将程序分为导入数据、构建网络、训练网络、测试网络四个部分。
这里特别说明一下:我在原有代码的基础上增加了模型的保存和读取以及测试结果的保存两个步骤,这两个步骤很容易实现,但对于我们测试网络十分有用。具体实现如下:
模型保存与读取:
# 保存模型
...
model_save_path = 'E:\\MNIST_data\\models\\conv_1.ckpt' # 模型保存的路径
saver = tf.train.Saver() # 声明tf.train.Saver类用于保存和读取模型
with tf.Session() as sess:
for step in range(100000): # 训练10万步
sess.run(train_op,feed_dict={x:train_images_batch,y_label:train_labels_batch}) #训练
if step%100 == 0:
saver.save(sess, model_save_path) # 参数每更新100次保存一次模型
# 读取模型
...
model_save_path = 'E:\\MNIST_data\\models\\conv_1.ckpt' # 模型保存的路径
saver = tf.train.Saver() # 声明tf.train.Saver类用于保存和读取模型
with tf.Session() as sess:
saver.restore(sess,model_save_path) # 读取model_save_path中保存的模型
测试结果的保存:
...
from PIL import Image # 保存图片到本地需要导入这个包
batch_aize = 10
model_save_path = 'E:\\MNIST_data\\models\\conv_1.ckpt' # 模型保存的路径
test_input_save_path = 'E:\\MNIST_data\\result\\test_input\\' # 输入的测试数据保存的路径
test_output_save_path = 'E:\\MNIST_data\\result\\test_output\\' # 网络输出保存的路径
test_label_save_path = 'E:\\MNIST_data\\result\\test_label\\' # 输入的测试数据对应的真值保存的路径
saver = tf.train.Saver()
with tf.Session() as sess:
saver.restore(sess,model_save_path) # 加载训练好的模型用于测试
...
test_images_batch,test_labels_batch = sess.run([Test_Images_Batch,Test_Labels_Batch])#加载测试数据
test_images_batch = np.reshape(test_images_batch,[batch_size,img_W,img_H,1])
test_labels_batch = np.reshape(test_labels_batch,[batch_size,img_W,img_H,1])
y_pred = sess.run(y_conv,feed_dict={x:test_images_batch})
y_pred[y_pred<0] = 0 # 小于零的像素默认为0
for i in range(batch_size): # 将处理结果保存到本地
img_i = test_images_batch[i,:,:,:]
img_i = np.reshape(img_i,[28,28])
img_i = Image.fromarray(img_i.astype('uint8')).convert('L') # 将数组转换为图片
img_o = y_pred[i,:,:,:]
img_o = np.reshape(img_o,[28,28])
img_o = Image.fromarray(img_o.astype('uint8')).convert('L')
y_real = test_labels_batch[i,:,:,:]
y_real = np.reshape(y_real,[28,28])
y_real = Image.fromarray(y_real.astype('uint8')).convert('L')
img_i.save(test_input_save_path+'%d.bmp'%(i)) # 图片保存到本地
img_o.save(test_output_save_path+'%d.bmp'%(i))
y_real.save(test_label_save_path+'%d.bmp'%(i)
下面贴出精简之后的代码:
file name: input_data.py
import tensorflow as tf
from PIL import Image
import os
img_W = 28 # 图像宽度
img_H = 28 # 图像高度
batch_size = 10 # 每个mini-batch含有的样本数量
min_after_dequeue = 1000 # 队列中最少文件数量
capacity = min_after_dequeue + 3*batch_size # 队列中最多文件数量
def _bytes_feature(value): # 生成字符串型的属性,用于存储图片像素信息,根据自己问题的要求选择要存的属性
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
# 将image_path和label_path中的图片一一对应封装在TFRecord_path中
def generate_TFRecordfile(image_path,label_path,TFRecord_path):
images = []
labels = []
for file in os.listdir(image_path):
images.append(image_path+file) # 得到所有转置图像的文件名
for file in os.listdir(label_path):
labels.append(label_path+file) # 得到所有未转置图像的文件名
num_examples = len(images) # 统计有多少用于训练的图片
print('There are %d images\n'%(num_examples))
writer = tf.python_io.TFRecordWriter(TFRecord_path) #创建一个writer写TFRecord文件
for index in range(num_examples):
image = Image.open(images[index]) # 打开一个image
image = image.tobytes() # 转换为字符型格式(因为之前生成的也是字符串型的属性嘛)
label = Image.open(labels[index]) # 打开一个对应的label
label = label.tobytes() # 转换为字符型格式(因为之前生成的也是字符串型的属性嘛)
#将一个样例转换为Example Protocol Buffer的格式,并且一组数据的信息都写入这个数据结构中,(打包咯)
example = tf.train.Example(features=tf.train.Features(feature={
'image':_bytes_feature(image),
'label':_bytes_feature(label)}))
writer.write(example.SerializeToString())#将这个example 写入TFRecord文件
print('TFRecord file was generated successfully\n')
writer.close()
def get_batch(TFRecord_path):
reader = tf.TFRecordReader() # 创建一个reader来读取TFRecord文件中的样例
files = tf.train.match_filenames_once(TFRecord_path) # 获取文件列表
# 创建文件名队列,乱序,每个样本使用num_epochs次
filename_queue = tf.train.string_input_producer(files,shuffle = True,num_epochs = None)
# 读取并解析一个样本
_,example = reader.read(filename_queue)
features = tf.parse_single_example(
example,
features={
'image':tf.FixedLenFeature([],tf.string),
'label':tf.FixedLenFeature([],tf.string)})
# 使用tf.decode_raw将字符串解析成图像对应的像素数组 ()
images = tf.decode_raw(features['image'],tf.uint8)
labels = tf.decode_raw(features['label'],tf.uint8)
# 所得像素数组为shape为((img_W*img_H),),应该reshape
images = tf.reshape(images, shape=[img_W,img_H])
labels = tf.reshape(labels, shape=[img_W,img_H])
#在这里添加图像预处理函数(optional)
#使用tf.train.shuffle_batch来随机组合数据生成用于随机梯度下降的mini-batch
Image_Batch,Label_Batch = tf.train.shuffle_batch([images,labels],
batch_size = batch_size,
num_threads = 5,
min_after_dequeue = min_after_dequeue,
capacity = capacity)
return Image_Batch,Label_Batch
file name: model.py
import tensorflow as tf
batch_size = 10
# 定义权重的函数
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1) # 从截断的正态分布中输出随机值μ-2σ,μ+2σ
return tf.Variable(initial)
# 定义偏置的函数
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
# 定义卷积层的函数
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
# 定义池化层的函数
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
# 定义前向传播过程
def inference(x):
# 第一卷积层
W_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])
h_conv1 = tf.nn.relu(conv2d(x, W_conv1) + b_conv1)
# 第一池化层
h_pool1 = max_pool_2x2(h_conv1)
# 第二卷积层
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
# 第二池化层
h_pool2 = max_pool_2x2(h_conv2)
# 上采样层1
W_de_conv1 = W_conv2
h_de_conv1 = tf.nn.conv2d_transpose(h_pool2,W_de_conv1,output_shape=[batch_size, 14, 14, 32],strides=[1,2,2,1],padding="SAME")
# 上采样层2
W_de_conv2 = W_conv1
h_de_conv2 = tf.nn.conv2d_transpose(h_de_conv1,W_de_conv2,output_shape=[batch_size, 28, 28, 1],strides=[1,2,2,1],padding="SAME")
# 网络输出的结果
return h_de_conv2
file name: train.py
import time
time_start=time.time() # time.time()为1970.1.1到当前时间的毫秒数
import tensorflow as tf
import numpy as np
import input_data # 导入与输入数据相关的操作
import model # 导入模型
img_W = 28 # 图像宽度
img_H = 28 # 图像高度
batch_size = 10 # 每个mini-batch含有的样本数量
min_after_dequeue = 1000 # 队列中最少文件数量
capacity = min_after_dequeue + 3*batch_size # 队列中最多文件数量
train_image_path = 'E:\\MNIST_data\\train_images\\' # 输入图像的路径
train_label_path = 'E:\\MNIST_data\\train_labels\\' # 输出图像的路径
Train_TFRecord_path = 'E:\\MNIST_data\\tfrecord\\train_data_set.tfrecord'# 输出TFRecord文件的路径
test_image_path = 'E:\\MNIST_data\\test_images\\' # 输入图像的路径
test_label_path = 'E:\\MNIST_data\\test_labels\\' # 输出图像的路径
Test_TFRecord_path = 'E:\\MNIST_data\\tfrecord\\test_data_set.tfrecord' # 输出TFRecord文件的路径
model_save_path = 'E:\\MNIST_data\\models\\conv_1.ckpt' # 模型保存的路径
print('please wait for generating the TFRecord file of training sets...')
input_data.generate_TFRecordfile(train_image_path,train_label_path,Train_TFRecord_path)# 调用函数生成TFRecord文件
print('please wait for generating the TFRecord file of test sets...')
input_data.generate_TFRecordfile(test_image_path,test_label_path,Test_TFRecord_path) # 调用函数生成TFRecord文件
Train_Images_Batch,Train_Labels_Batch = input_data.get_batch(Train_TFRecord_path) # 调用函数多线程读取TFRecord文件生成mini-batch
Test_Images_Batch,Test_Labels_Batch = input_data.get_batch(Test_TFRecord_path) # 调用函数多线程读取TFRecord文件生成mini-batch
# 定义将mini-batch导入网络的占位符
x = tf.placeholder(tf.float32, shape=[None,img_W,img_H,1],name = 'images')
y_label = tf.placeholder(tf.float32, shape=[None,img_W,img_H,1],name = 'labels')
y_conv = model.inference(x)
loss = tf.reduce_mean(tf.square(y_conv - y_label)) # 定义代价函数为均方误差
train_op = tf.train.AdamOptimizer(1e-4).minimize(loss) # 使用梯度下降算法对参数进行寻优
init_op = (tf.local_variables_initializer(),tf.global_variables_initializer())#初始化操作
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(init_op)
coord = tf.train.Coordinator() # 用于协调多个线程同时终止
threads = tf.train.start_queue_runners(sess=sess,coord=coord) # 启动线程
try:
for step in range(100):
if coord.should_stop(): # 读到结束标记后coord.should_stop()变为True,跳出循环
break
train_images_batch,train_labels_batch = sess.run([Train_Images_Batch,Train_Labels_Batch])
train_images_batch = np.reshape(train_images_batch,[batch_size,img_W,img_H,1]) # 一个样本为行
train_labels_batch = np.reshape(train_labels_batch,[batch_size,img_W,img_H,1])
sess.run(train_op,feed_dict={x:train_images_batch,y_label:train_labels_batch}) # 将mini-batch feed给train_op 训练网络
if step%100 == 0:
test_images_batch,test_labels_batch = sess.run([Test_Images_Batch,Test_Labels_Batch])
test_images_batch = np.reshape(test_images_batch,[batch_size,img_W,img_H,1])
test_labels_batch = np.reshape(test_labels_batch,[batch_size,img_W,img_H,1])
train_loss = sess.run(loss,feed_dict={x:train_images_batch,y_label:train_labels_batch})
test_loss = sess.run(loss,feed_dict={x:test_images_batch,y_label:test_labels_batch})
print('step %d: loss on training set batch:%d loss on testing set batch:%d' % (step,train_loss,test_loss))
saver.save(sess, model_save_path)
except tf.errors.OutOfRangeError: # 捕捉文件名队列中的结束标记
print('epoch limit reached')
coord.request_stop() #通知其它线程停止读取数据
finally:
coord.request_stop()
coord.join(threads) #等待所有线程退出
saver.save(sess, model_save_path) # 保存模型
time_end=time.time() # time.time()为1970.1.1到当前时间的毫秒数
print('\nTrain Finished\ntotal run time is : %f s \nThe network was saved in %s' %(time_end-time_start,model_save_path))
file name: test.py
import time
time_start=time.time() # time.time()为1970.1.1到当前时间的毫秒数
import tensorflow as tf
from PIL import Image #注意Image,后面会用到
import matplotlib.pyplot as plt
import numpy as np
import input_data # 导入与输入数据相关的操作
import model # 导入模型
tf.reset_default_graph()
test_images_path = 'E:\\MNIST_data\\test_images\\'
test_labels_path = 'E:\\MNIST_data\\test_labels\\'
test_TFRecord_path = 'E:\\MNIST_data\\tfrecord\\test_dataSet.tfrecord'
model_save_path = 'E:\\MNIST_data\\models\\conv_1.ckpt'
test_input_save_path = 'E:\\MNIST_data\\result\\test_input\\'
test_output_save_path = 'E:\\MNIST_data\\result\\test_output\\'
test_label_save_path = 'E:\\MNIST_data\\result\\test_label\\'
step_num = 20 # 训练步数
img_W = 28 # 图像宽度
img_H = 28 # 图像高度
batch_size = 10 # 每个mini-batch含有的样本数量
min_after_dequeue = 1000 # 队列中最少文件数量
capacity = min_after_dequeue + 3*batch_size # 队列中最多文件数量
num_epochs = 1 # 每个样本用于训练的次数,测试时每个样本只使用一次应该设置为1
num_threads = 5 # 入队操作(数据读取及预处理过程)的线程数
print('please wait for generating the TFRecord file of test sets...')
input_data.generate_TFRecordfile(test_images_path,test_labels_path,test_TFRecord_path)
# 根据TFRecord文件中的内容生成符合mini-batch方法训练要求的 Image_Batch,Label_Batch
Test_Images_Batch,Test_Labels_Batch = input_data.get_batch(test_TFRecord_path)
x = tf.placeholder(tf.float32, shape=[None,img_W,img_H,1])
y_conv = model.inference(x)
saver = tf.train.Saver()
init_op = (tf.local_variables_initializer(),tf.global_variables_initializer())#初始化操作
with tf.Session() as sess:
# 神经网络训练准备工作。变量初始化,线程启动
sess.run(init_op)
saver.restore(sess,model_save_path) # 读取训练好的网络
coord = tf.train.Coordinator() # 线程终止(should_stop=True,request_stop)
threads = tf.train.start_queue_runners(sess=sess,coord=coord) # 启动多个线程操作同一队列
test_images_batch,test_labels_batch = sess.run([Test_Images_Batch,Test_Labels_Batch])
test_images_batch = np.reshape(test_images_batch,[batch_size,img_W,img_H,1]) # 一个样本为行
test_labels_batch = np.reshape(test_labels_batch,[batch_size,img_W,img_H,1])
y_pred = sess.run(y_conv,feed_dict={x:test_images_batch}) # 将mini-batch feed给train_op 训练网络
y_pred[y_pred <0] = 0
for i in range(batch_size): # 将处理结果保存到本地
y_real = test_labels_batch[i,:,:,:]
y_real = np.reshape(y_real,[28,28])
y_real = Image.fromarray(y_real.astype('uint8')).convert('L')
img_i = test_images_batch[i,:,:,:]
img_i = np.reshape(img_i,[28,28])
img_i = Image.fromarray(img_i.astype('uint8')).convert('L')
img_o = y_pred[i,:,:,:]
img_o = np.reshape(img_o,[28,28])
img_o = Image.fromarray(img_o.astype('uint8')).convert('L')
img_i.save(test_input_save_path+'%d.bmp'%(i))
img_o.save(test_output_save_path+'%d.bmp'%(i))
y_real.save(test_label_save_path+'%d.bmp'%(i))
# 在测试结束后输出一组结果用于查看
num_img = 1
img_i = test_images_batch[num_img,:,:,:]
img_i = np.reshape(img_i,[28,28])
img_i = Image.fromarray(img_i.astype('uint8')).convert('L')
y_real = test_labels_batch[num_img,:,:,:]
y_real = np.reshape(y_real,[28,28])
y_real = Image.fromarray(y_real.astype('uint8')).convert('L')
img_o = y_pred[num_img,:,:,:]
img_o = np.reshape(img_o,[28,28])
img_o = Image.fromarray(img_o.astype('uint8')).convert('L')
plt.subplot(131)
plt.imshow(img_i)
plt.subplot(132)
plt.imshow(y_real)
plt.subplot(133)
plt.imshow(img_o)
plt.show()
coord.request_stop()
coord.join(threads)
time_end=time.time() # time.time()为1970.1.1到当前时间的毫秒数
print('\nTest Finished\nTotal run time is : %f s \nResults were saved in: %s' %(time_end-time_start,test_output_save_path))
四个.py 文件都存放于我们之前新建好的文件夹中:
这样一来,在我们导入训练数据时,我们只需关心input_data.py中的内容,构建模型时只需在model.py中做改动,然后在train.py中训练网络,训练好网络后再运行test.py测试网络的性能。这样一来,我们的思路就清晰了很多。
下一节,我们将介绍一个神器,TensorBoard,它可以实时监控我们的训练过程。