制作数据集
- tfrecords 文件:
(1)tfrecords
: 是一种二进制文件,可先将图片和标签制作成该格式的文件。使用 tfrecords 进行数据读取,会提高内存利用率。
(2)tf.train.Example
: 用来存储训练数据。训练数据的特征用键值对的形式表示。
如:‘ img_raw ’ :值 ‘ label ’ :值
值是 Byteslist/FloatList/Int64List
(3)SerializeToString( )
: 把数据序列化成字符串存储。 代码
mnist_generateds.py
mnist_forward.py
mnist_backward.py
mnist_test.py
mnist_app.py(1) 数据集生成读取文件 mnist_generateds.py
#coding:utf-8 import tensorflow as tf import numpy as np from PIL import Image import os image_train_path='./mnist_data_jpg/mnist_train_jpg_60000/' label_train_path='./mnist_data_jpg/mnist_train_jpg_60000.txt' tfRecord_train='./data/mnist_train.tfrecords' image_test_path='./mnist_data_jpg/mnist_test_jpg_10000/' label_test_path='./mnist_data_jpg/mnist_test_jpg_10000.txt' tfRecord_test='./data/mnist_test.tfrecords' data_path='./data' resize_height = 28 resize_width = 28 def write_tfRecord(tfRecordName, image_path, label_path): writer = tf.python_io.TFRecordWriter(tfRecordName) num_pic = 0 f = open(label_path, 'r') contents = f.readlines() f.close() for content in contents: value = content.split() img_path = image_path + value[0] img = Image.open(img_path) img_raw = img.tobytes() labels = [0] * 10 labels[int(value[1])] = 1 example = tf.train.Example(features=tf.train.Features(feature={ 'img_raw': tf.train.Feature(bytes_list=tf.train.BytesList(value=[img_raw])), 'label': tf.train.Feature(int64_list=tf.train.Int64List(value=labels)) })) writer.write(example.SerializeToString()) num_pic += 1 print ("the number of picture:", num_pic) writer.close() print("write tfrecord successful") def generate_tfRecord(): isExists = os.path.exists(data_path) if not isExists: os.makedirs(data_path) print 'The directory was created successfully' else: print 'directory already exists' write_tfRecord(tfRecord_train, image_train_path, label_train_path) write_tfRecord(tfRecord_test, image_test_path, label_test_path) def read_tfRecord(tfRecord_path): filename_queue = tf.train.string_input_producer([tfRecord_path], shuffle=True) reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) features = tf.parse_single_example(serialized_example, features={ 'label': tf.FixedLenFeature([10], tf.int64), 'img_raw': tf.FixedLenFeature([], tf.string) }) img = tf.decode_raw(features['img_raw'], tf.uint8) img.set_shape([784]) img = tf.cast(img, tf.float32) * (1. / 255) label = tf.cast(features['label'], tf.float32) return img, label def get_tfrecord(num, isTrain=True): if isTrain: tfRecord_path = tfRecord_train else: tfRecord_path = tfRecord_test img, label = read_tfRecord(tfRecord_path) img_batch, label_batch = tf.train.shuffle_batch([img, label], batch_size = num, num_threads = 2, capacity = 1000, min_after_dequeue = 700) return img_batch, label_batch def main(): generate_tfRecord() if __name__ == '__main__': main()
1)
filename_queue = tf.train.string_input_producer([tfRecord_path])
# 该函数会生成一个先入先出的队列,文件阅读器会使用它来读取数据。 tf.train.string_input_producer( string_tensor, # 存储图像和标签信息的 TFRecord 文件名列表 num_epochs=None, # 循环读取的轮数(可选) shuffle=True, # 布尔值(可选),如果为 True,则在每轮随机打乱读取顺序 seed=None, # 随机读取时设置的种子(可选) capacity=32, # 设置队列容量 shared_name=None, # (可选) 如果设置,该队列将在多个会话中以给定名称共享。所有具有此队列的设备都可以通过 shared_name 访问它。在分布式设置中使用这种方法意味着每个名称只能被访问此操作的其中一个会话看到。 name=None, # 操作的名称(可选) cancel_op=None # 取消队列 )
2)
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(serialized_example,features={ 'img_raw': tf.FixedLenFeature([ ], tf.string) , 'label': tf.FixedLenFeature([10], tf.int64)}) # 把读出的每个样本保存在 serialized_example 中进行解序列化, # 标签和图片的键名应该和制作 tfrecords 的键名相同,其中标签给出几分类。
# 该函数可以将 tf.train.Example 协议内存块(protocol buffer)解析为张量。 tf.parse_single_example( serialized, # 一个标量字符串张量 features, # 一个字典映射功能键 FixedLenFeature 或 VarLenFeature值,也就是在协议内存块中储存的 name=None, example_names=None # 标量字符串联的名称(可选) )
3)
img_batch, label_batch = tf.train.shuffle_batch()
# 这个函数随机读取一个 batch 的数据。 tf.train.shuffle_batch( tensors, # 待乱序处理的列表中的样本(图像和标签) batch_size, # 从队列中提取的新批量大小 capacity, # 队列中元素的最大数量 min_after_dequeue, # 出队后队列中的最小数量元素,用于确保元素的混合级别 num_threads=1, # 排列 tensors 的线程数 seed=None, # 用于队列内的随机洗牌 enqueue_many=False, # tensor 中的每个张量是否是一个例子 shapes=None, # 每个示例的形状 allow_smaller_final_batch=False, # (可选)布尔值。 如果为 True,则在队列中剩余数量不足时允许最终批次更小。 shared_name=None, # (可选)如果设置,该队列将在多个会话中以给定名称共享。 name=None # 操作的名称(可选) )
(2) mnist_forward.py
import tensorflow as tf INPUT_NODE = 784 OUTPUT_NODE = 10 LAYER1_NODE = 500 def get_weight(shape, regularizer): w = tf.Variable(tf.truncated_normal(shape,stddev=0.1)) if regularizer != None: tf.add_to_collection('losses', tf.contrib.layers.l2_regularizer(regularizer)(w)) return w def get_bias(shape): b = tf.Variable(tf.zeros(shape)) return b def forward(x, regularizer): w1 = get_weight([INPUT_NODE, LAYER1_NODE], regularizer) b1 = get_bias([LAYER1_NODE]) y1 = tf.nn.relu(tf.matmul(x, w1) + b1) w2 = get_weight([LAYER1_NODE, OUTPUT_NODE], regularizer) b2 = get_bias([OUTPUT_NODE]) y = tf.matmul(y1, w2) + b2 return y
(3) 反向传播文件修改图片标签获取的接口 mnist_backward.py
import tensorflow as tf from tensorflow.examples.tutorials.mnist import input_data import mnist_forward import os import mnist_generateds#1 BATCH_SIZE = 200 LEARNING_RATE_BASE = 0.1 LEARNING_RATE_DECAY = 0.99 REGULARIZER = 0.0001 STEPS = 50000 MOVING_AVERAGE_DECAY = 0.99 MODEL_SAVE_PATH="./model/" MODEL_NAME="mnist_model" train_num_examples = 60000#2 def backward(): x = tf.placeholder(tf.float32, [None, mnist_forward.INPUT_NODE]) y_ = tf.placeholder(tf.float32, [None, mnist_forward.OUTPUT_NODE]) y = mnist_forward.forward(x, REGULARIZER) global_step = tf.Variable(0, trainable=False) ce = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1)) cem = tf.reduce_mean(ce) loss = cem + tf.add_n(tf.get_collection('losses')) learning_rate = tf.train.exponential_decay( LEARNING_RATE_BASE, global_step, train_num_examples / BATCH_SIZE, LEARNING_RATE_DECAY, staircase=True) train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step) ema = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step) ema_op = ema.apply(tf.trainable_variables()) with tf.control_dependencies([train_step, ema_op]): train_op = tf.no_op(name='train') saver = tf.train.Saver() img_batch, label_batch = mnist_generateds.get_tfrecord(BATCH_SIZE, isTrain=True)#3 with tf.Session() as sess: init_op = tf.global_variables_initializer() sess.run(init_op) ckpt = tf.train.get_checkpoint_state(MODEL_SAVE_PATH) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) coord = tf.train.Coordinator()#4 threads = tf.train.start_queue_runners(sess=sess, coord=coord)#5 for i in range(STEPS): xs, ys = sess.run([img_batch, label_batch])#6 _, loss_value, step = sess.run([train_op, loss, global_step], feed_dict={x: xs, y_: ys}) if i % 1000 == 0: print("After %d training step(s), loss on training batch is %g." % (step, loss_value)) saver.save(sess, os.path.join(MODEL_SAVE_PATH, MODEL_NAME), global_step=global_step) coord.request_stop()#7 coord.join(threads)#8 def main(): backward()#9 if __name__ == '__main__': main()
1) 关键操作:利用多线程提高图片和标签的批获取效率
方法:将批获取的操作放到线程协调器开启和关闭之间
2) 开启线程协调器:coord = tf.train.Coordinator( ) threads = tf.train.start_queue_runners(sess=sess, coord=coord)
# 这个函数将会启动输入队列的线程,填充训练样本到队列中,以便出队操作可以从队列中拿到样本。 # 这种情况下最好配合使用一个 tf.train.Coordinator ,这样可以在发生错误的情况下正确地关闭这些线程。 tf.train.start_queue_runners( sess=None, # 用于运行队列操作的会话。 默认为默认会话。 coord=None, # 可选协调器,用于协调启动的线程。 daemon=True, # 守护进程,线程是否应该标记为守护进程,这意味着它们不会阻止程序退出。 start=True, # 设置为 False 只创建线程,不启动它们。 collection=tf.GraphKeys.QUEUE_RUNNERS #指定图集合以获取启动队列的GraphKey。默认为GraphKeys.QUEUE_RUNNERS。 )
关闭线程协调器:
coord.request_stop( ) coord.join(threads)
(4) mnist_test.py
#coding:utf-8 import time import tensorflow as tf from tensorflow.examples.tutorials.mnist import input_data import mnist_forward import mnist_backward import mnist_generateds TEST_INTERVAL_SECS = 5 TEST_NUM = 10000#1 def test(): with tf.Graph().as_default() as g: x = tf.placeholder(tf.float32, [None, mnist_forward.INPUT_NODE]) y_ = tf.placeholder(tf.float32, [None, mnist_forward.OUTPUT_NODE]) y = mnist_forward.forward(x, None) ema = tf.train.ExponentialMovingAverage(mnist_backward.MOVING_AVERAGE_DECAY) ema_restore = ema.variables_to_restore() saver = tf.train.Saver(ema_restore) correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) img_batch, label_batch = mnist_generateds.get_tfrecord(TEST_NUM, isTrain=False)#2 while True: with tf.Session() as sess: ckpt = tf.train.get_checkpoint_state(mnist_backward.MODEL_SAVE_PATH) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] coord = tf.train.Coordinator()#3 threads = tf.train.start_queue_runners(sess=sess, coord=coord)#4 xs, ys = sess.run([img_batch, label_batch])#5 accuracy_score = sess.run(accuracy, feed_dict={x: xs, y_: ys}) print("After %s training step(s), test accuracy = %g" % (global_step, accuracy_score)) coord.request_stop()#6 coord.join(threads)#7 else: print('No checkpoint file found') return time.sleep(TEST_INTERVAL_SECS) def main(): test()#8 if __name__ == '__main__': main()
(5) mnist_app.py
#coding:utf-8 import tensorflow as tf import numpy as np from PIL import Image import mnist_backward import mnist_forward def restore_model(testPicArr): with tf.Graph().as_default() as tg: x = tf.placeholder(tf.float32, [None, mnist_forward.INPUT_NODE]) y = mnist_forward.forward(x, None) preValue = tf.argmax(y, 1) variable_averages = tf.train.ExponentialMovingAverage(mnist_backward.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) with tf.Session() as sess: ckpt = tf.train.get_checkpoint_state(mnist_backward.MODEL_SAVE_PATH) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) preValue = sess.run(preValue, feed_dict={x:testPicArr}) return preValue else: print("No checkpoint file found") return -1 def pre_pic(picName): img = Image.open(picName) reIm = img.resize((28,28), Image.ANTIALIAS) im_arr = np.array(reIm.convert('L')) threshold = 50 for i in range(28): for j in range(28): im_arr[i][j] = 255 - im_arr[i][j] if (im_arr[i][j] < threshold): im_arr[i][j] = 0 else: im_arr[i][j] = 255 nm_arr = im_arr.reshape([1, 784]) nm_arr = nm_arr.astype(np.float32) img = np.multiply(nm_arr, 1.0/255.0) return nm_arr #img def application(): testNum = input("input the number of test pictures:") for i in range(testNum): testPic = raw_input("the path of test picture:") testPicArr = pre_pic(testPic) preValue = restore_model(testPicArr) print "The prediction number is:", preValue def main(): application() if __name__ == '__main__': main()