Tensorflow：实战Google深度学习框架（输入数据处理框架）-CSDN博客

本文链接：https://blog.csdn.net/weixin_41829234/article/details/89371021
参考：TensorFlow：实战Google深度学习框架
# *_*coding:utf-8 *_*

import tensorflow as tf
import numpy as np

def distort_color(image, color_ordering=0):
    if color_ordering == 0:
        image = tf.image.random_brightness(image, max_delta=32. / 255.)
        image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
        image = tf.image.random_hue(image, max_delta=0.2)
        image = tf.image.random_contrast(image, lower=0.5, upper=1.5)

    elif color_ordering == 1:
        image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
        image = tf.image.random_hue(image, max_delta=0.2)
        image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
        image = tf.image.random_brightness(image, max_delta=32. / 255.)

    elif color_ordering == 2:
        image = tf.image.random_hue(image, max_delta=0.2)
        image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
        image = tf.image.random_brightness(image, max_delta=32. / 255.)
        image = tf.image.random_saturation(image, lower=0.5, upper=1.5)

    elif color_ordering == 3:
        image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
        image = tf.image.random_brightness(image, max_delta=32. / 255.)
        image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
        image = tf.image.random_hue(image, max_delta=0.2)

    """Clips tensor values to a specified min and max.
    基于定义的min与max对tensor数据进行截断操作，目的是为了应对梯度爆发或者梯度消失的情况
    """
    return tf.clip_by_value(image, 0.0, 1.0)

"""
给定一张解码后的图像，目标图像的尺寸以图像上的标注框，此函数可以对给出的额图像进行预处理
这个函数的输入图像是图像识别问题中的原始的训练数据，而输出则是神经网络模型的输入层。
注意这里只处理模型的训练数据，对于预测数据，一般不需要使用随机变换的步骤
"""
def preprocess_for_train(image, height, width, bbox):
    #如果没有提供标注框，则认为整个图像就是需要关注的部分
    if bbox is None:
        bbox = tf.constant([0.0, 0.0, 1.0, 1.0], dtype=tf.float32, shape=[1, 1, 4])

    #转换图像张量的类型
    if image.dtype != tf.float32:
        image = tf.image.convert_image_dtype(image, dtype=tf.float32)

    #随机截取图像，减小需要关注的物体大小对图像识别算法的影响
    '''注意sample_distorted_bounding_box函数，已经改变了，详细见函数定义'''
    bbox_begin, bbox_size, _ = tf.image.sample_distorted_bounding_box(tf.shape(image),
                                                                      bounding_boxes=bbox, min_object_covered=0.1)
    distort_image = tf.slice(image, bbox_begin, bbox_size)

    #将随机截取的图像调整为神经网络的输入层大小，大小的调整算法是随机选择的
    distort_image = tf.image.resize_images(distort_image, [height, width], method = np.random.randint(4))
    #随机左右翻转图像
    distort_image = tf.image.random_flip_left_right(distort_image)
    #使用一种随机的顺序调整图像色彩
    distort_image = distort_color(distort_image, np.random.randint(3))

    return distort_image
#神经网络的前向传播算法,此处只是为了做一个示意
def inference(image_batch):
    pass
#损失函数的计算，此处只是为了做一个示例函数，具体问题时，在定义函数体的变量和返回值
def calc_loss(logit, label_batch):
    pass

'''
创建文件列表，并通过文件列表创建输入文件队列。在调用输入数据处理流程前，需要统一原始数据格式并将它们存储到TFRecord文件中。
下面给出的文件列表应该包含所有提供训练数据的TFRecord文件
'''
#使用tf.train.match_filenames_once函数获取文件列表
files = tf.train.match_filenames_once('path/too/file_pattern-*')
filename_queue = tf.train.string_input_producer(files, shuffle=False)

'''
使用类似7.1接种介绍的方法解析TFRecord文件中的数据。这里假设image中存储的是图像的原始数据，label为该样例所对应的标签。height、
width和channels给出了图片的维度
'''
#如7.1节所示读取并解析一个样本
#创建一个reader来读取TFRecord文件中的样例
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)  #返回文件名和文件
features = tf.parse_single_example(
    serialized_example,
    features={
        'image':tf.FixedLenFeature([], tf.string),
        'label':tf.FixedLenFeature([], tf.int64),
        'height':tf.FixedLenFeature([], tf.int64),
        'width':tf.FixedLenFeature([], tf.int64),
        'channels':tf.FixedLenFeature([], tf.int64)
    })

image, label = features['image'], features['label']
height, width = features['height'], features['width']
channels = features['channels']

#从原始图像数据中解析出像素矩阵，并根据图像尺寸还原图像
#使用tf.decode_raw可以将字符串解析成图像对应的像素数组
decoded_image =  tf.decode_raw(image, tf.uint8)
decoded_image.set_shape([height, width, channels])

#定义神经网络输入层图片的大小
'''这里的image_size是神经网络输入层图片的大小，与图片的原始大小无关，这个大小是为了进行图像预处理过程中，对原始图像进行调整的大小
如原始图片为29x29x3，经过preprocess_for_train图像预处理函数过后，输出图片为299x299x3
'''
image_size = 299
#preprocess_for_train为7.1.2节介绍的图像预处理程序
distorted_image = preprocess_for_train(decoded_image, image_size, image_size, None)


#将处理后的图像和标签通过tf.train.shuffle_batch整理成神经网路训练时需要处理的batch
min_after_dequeue = 10000
batch_size = 100
capacity = min_after_dequeue + 3 * batch_size
image_batch, label_batch = tf.train.shuffle_batch(
    [distorted_image, label], batch_size=batch_size, capacity=capacity, min_after_dequeue=min_after_dequeue)

#定义神经网络的结构以及优化过程，image_batch可以作为输入提供给神经网络的输入层
#label_batch则提供输入batch中样例的正确答案
TRAINING_ROUNDS = 30000
logit = inference(image_batch)   #这是前面章节所示的钱箱传播的算法
loss = calc_loss(logit, label_batch)     #这是损失函数的计算
learning_rate = 0.001
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)

#声明会话并运行神经网络的优化过程
with tf.Session() as sess:
    #神经网络训练准备工作。这些工作包括变量的初始化、线程启动
    init = tf.global_variables_initializer()
    local_init = tf.local_variables_initializer()
    sess.run([init, local_init])

    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    #神经网络的训练过程
    for _ in range(TRAINING_ROUNDS):
        sess.run(train_step)

    coord.request_stop()
    coord.join(threads)



'''
将数据转为 TFRecord 格式的多个文件
用 tf.train.match_filenames_once() 创建文件列表（图中为{A,B,C}）
用 tf.train.string_input_producer() 创建输入文件队列，可以将输入文件顺序随机打乱，并加入输入队列（是否打乱为可选项，该函数也会生成并维护一个输入文件队列，不同进程中的文件读取函数可以共享这个输入文件队列）
用 tf.TFRecordReader() 读取文件中的数据
用 tf.parse_single_example() 解析数据
对数据进行解码及预处理
用 tf.train.shuffle_batch() 将数据组合成 batch
将 batch 用于训练

'''