第六课 Tensorflow Cifar10 CNN 卷积神经网络

最新推荐文章于 2024-08-03 10:47:10 发布

cjopengler

最新推荐文章于 2024-08-03 10:47:10 发布

阅读量1.7k

点赞数

分类专栏：机器学习之路-tensorflow 文章标签： tensorflow cnn神经网络 cifar10

本文链接：https://blog.csdn.net/cjopengler/article/details/77948926

版权

机器学习之路-tensorflow 专栏收录该内容

15 篇文章 1 订阅

订阅专栏

对于train来说，基本就是几个步骤:

读取数据
构建训练网络
训练

# coding:utf-8
"""
训练
"""

from abc import ABCMeta
from abc import abstractmethod


class ITrain(object):

    __metaclass__ = ABCMeta

    @abstractmethod
    def train(self):
        """
        训练.
        :return: train op
        """
        pass

# coding:utf-8
"""
cifar10 train
"""

from train import ITrain
import tensorflow as tf
from cifar10_data_input import CIFAR10DataInput
from cifar10_inference import CIFAR10Inference
import time
import datetime


class CIFAR10Train(ITrain):

    INPUT_PATH = 'input/cifar10_bin_data/*.bin'
    TRAIN_PATH = 'output/train'
    BATCH_SIZE = 128
    NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 50000

    LEARNING_RATE_INITILAIZE = 0.1
    LEARING_RATE_DECAY_FACTOR = 0.1
    NUM_EPOCHS_PER_DECAY = 350.0

    def train(self):

        input_paths = tf.train.match_filenames_once(CIFAR10Train.INPUT_PATH)
        cifar10_input = CIFAR10DataInput(input_file_paths=input_paths,
                                         batch_size=CIFAR10Train.BATCH_SIZE,
                                         example_per_epoch_num=CIFAR10Train.NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN)

        image_batch, label_batch = cifar10_input.read_data()

        tf.summary.image('images', image_batch)

        cifar10_inference = CIFAR10Inference(image_channel=3,
                                             batch_size=CIFAR10Train.BATCH_SIZE,
                                             label_class_num=10)
        logits = cifar10_inference.inference(images=image_batch)
        loss = cifar10_inference.loss(logits, label_batch)

        train_op = self._train_op(loss)

        class _LoggerHook(tf.train.SessionRunHook):

            def __init__(self):
                super(_LoggerHook, self).__init__()
                self._step = -1
                self._start_time = time.time()
                self._log_frequency = 100

            def begin(self):
                self._step = -1
                self._start_time = time.time()
                self._log_frequency = 100

            def before_run(self, run_context):
                self._step += 1
                # loss会作为参数一起被运行 会在after_run运行结束后 将run_values 也就是这里的loss值传回
                return tf.train.SessionRunArgs(loss)

            def after_run(self, run_context, run_values):
                if self._step % self._log_frequency == 0:
                    current_time = time.time()
                    duration = current_time - self._start_time
                    self._start_time = current_time

                    loss_value = run_values.results
                    examples_per_sec = self._log_frequency* CIFAR10Train.BATCH_SIZE / duration
                    sec_per_batch = float(duration / self._log_frequency)

                    format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                                  'sec/batch)')
                    print(format_str % (datetime.datetime.now(), self._step, loss_value,
                                        examples_per_sec, sec_per_batch))

        with tf.train.MonitoredTrainingSession(checkpoint_dir=CIFAR10Train.TRAIN_PATH,
                                               hooks=[tf.train.StopAtStepHook(last_step=1000),  # 在执行了last_step会request stop
                                                      tf.train.NanTensorHook(loss), # 监控loss 为None
                                                      _LoggerHook()],
                                               config=tf.ConfigProto(log_device_placement=False)) as mon_sess:
            while not mon_sess.should_stop():
                mon_sess.run(train_op)

    def _train_op(self, loss):

        # 用来记录全局的global steps 也就是一共运行了多少步
        global_step = tf.contrib.framework.get_or_create_global_step()

        num_batchs_per_epoch = CIFAR10Train.NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN / CIFAR10Train.BATCH_SIZE
        decay_steps = int(num_batchs_per_epoch * CIFAR10Train.NUM_EPOCHS_PER_DECAY)

        # 使用指数衰减来计算变化的学习率
        learning_rate = tf.train.exponential_decay(CIFAR10Train.LEARNING_RATE_INITILAIZE,
                                                   global_step,
                                                   decay_steps=decay_steps,
                                                   decay_rate=CIFAR10Train.LEARING_RATE_DECAY_FACTOR,
                                                   staircase=True)
        tf.summary.scalar('learning_rate', learning_rate)

        # 计算平均loss
        loss_averages_op = self._add_loss_summaryies(total_loss=loss)

        # 表示控制执行的顺序 是计算完loss之后 在进行loss的优化
        # 如果不这样做，在并行计算的时候，就会出问题。所以 control_dependencies就相当于并行计算的汇总
        with tf.control_dependencies([loss_averages_op]):
            opt = tf.train.GradientDescentOptimizer(learning_rate)
            grads = opt.compute_gradients(loss)

        apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

        # vairablie add to histogram
        for var in tf.trainable_variables():
            tf.summary.histogram(var.op.name, var)

        for grad, var in grads:
            if grad is not None:
                tf.summary.histogram(var.op.name + '/gradients', grad)

        # 为什么变量也要计算移动平均，因为 最终可以使用移动平均的值来代替最终的变量。可以消除抖动引起的影响
        variable_averages = tf.train.ExponentialMovingAverage(0.9999, global_step)
        viariables_averages_op = variable_averages.apply(tf.trainable_variables())

        with tf.control_dependencies([apply_gradient_op, viariables_averages_op]):
            train_op = tf.no_op(name='train')

        return train_op

    def _add_loss_summaryies(self, total_loss):
        """
        计算total loss的移动平均
        :param total_loss: 每一次的total loss
        :return:
        """

        # ExponentialMovingAverage 这个的含义是创建移动平均
        # 也就是说收集所有的loss，这些会存储在 GraphKeys.MOVING_AVERAGE_VARIABLES
        # 所以这是一个存储的全局变量 session级别的生命周期
        # 初始化的时候 每一个初始值设置为0，然后 后面每一轮迭代产生的新的值会叠加上去
        loss_averages = tf.train.ExponentialMovingAverage(decay=0.9, name='avg')

        losses = tf.get_collection('losses')

        # apply 表示要对下面的每一个变量进行每一次迭代的移动平均计算. 不是list中的所有变量进行移动平均计算
        loss_averages_op = loss_averages.apply(losses + [total_loss])

        for l in losses + [total_loss]:
            # 输出查看需要看原始数据
            tf.summary.scalar(l.op.name + ' (raw)', l)
            # 和移动平均的数据
            tf.summary.scalar(l.op.name, loss_averages.average(l))

        return loss_averages_op

# coding:utf-8
"""
数据输入接口，包含数据的读取以及变换产出标准的数据格式供模型使用
"""

import tensorflow as tf
from abc import ABCMeta
from abc import abstractmethod


class IDataInput(object):
    """
    数据的输入
    """

    __metaclass__ = ABCMeta

    def __init__(self,
                 input_file_paths,
                 batch_size,
                 example_per_epoch_num,
                 parallel_thread_num=16):
        """
        初始化
        :param input_file_paths: 输入的文件路径列表
        :param batch_size: batch size大小
        :param example_per_epoch_num: 每一个epoch的样本数量 一般来说是总的样本数
        :param parallel_thread_num: 并行处理的线程数
        """
        self._input_file_paths = input_file_paths
        self._batch_size = batch_size
        self._parallel_thread_num = parallel_thread_num
        self._example_per_echo_num = example_per_epoch_num

    def read_data(self):
        """
        读取数据
        :return: (data_batch, image_batch)
        """

        # 创建输入的queue
        file_path_queue = tf.train.string_input_producer(self._input_file_paths)

        record = self._read_data_from_queue(file_path_queue)

        stander_data, label = self._preprocess_data(record)

        data_batch, label_batch = self._generate_train_batch(stander_data, label, shuffle=False)
        return data_batch, label_batch

    @abstractmethod
    def _read_data_from_queue(self, file_path_queue):
        """
        根据queue 读取数据并返回需要的格式
        :param file_path_queue:
        :return:
        """
        pass

    def test_read_data_from_queue(self):
        # 创建输入的queue
        file_path_queue = tf.train.string_input_producer(self._input_file_paths)
        return self._read_data_from_queue(file_path_queue)

    @abstractmethod
    def _preprocess_data(self, record):
        """
        对读入的record进行一些预处理。对于图像来说，进行一些扭曲，加入噪音等操作
        :param record: 在_read_data_from_queue读取的record
        :return:
        """
        pass

    def _generate_train_batch(self, train_data, label, shuffle=True):
        """
        通过队列创建数据batch
        :param train_data: 训练数据
        :param label: label
        :param shuffle: 是否将样本随机后生成batch
        :return:
        """

        # 队列的capacity，设置来保证内存够用
        capacity = self._example_per_echo_num * 0.4 + 3 * self._batch_size

        if shuffle:
            data_batch, label_batch = tf.train.shuffle_batch([train_data, label],
                                                             batch_size=self._batch_size,
                                                             num_threads=self._parallel_thread_num,
                                                             capacity=capacity,
                                                             min_after_dequeue=self._example_per_echo_num * 0.4)
        else:
            data_batch, label_batch = tf.train.batch([train_data, label],
                                                     batch_size=self._batch_size,
                                                     num_threads=self._parallel_thread_num,
                                                     capacity=capacity)
        return data_batch, tf.reshape(label_batch, [self._batch_size])

# coding:utf-8
"""
CIFAR10的输入读取
"""

import tensorflow as tf
from data_input import IDataInput


class CIFAR10Record(object):
    """
    CIFAR10 读取的record
    """
    pass


class CIFAR10DataInput(IDataInput):
    """
    Cifar10的数据输入.

    数据集说明如下:

    data_batch_1.bin - data_batch_5.bin 5个bin用作训练集.

    bin 中的数据，第1个字是label，接下来的3072表示图片, 前1024字节是R，接下来1024是G，最后1024字节是B，
    所以是32 * 32=1024的R G B图片. 每个bin包含 10000 个图片，所以总共有5W个训练图片。

    test_batch.bin 1个bin用作测试集。
    它的结构与训练集是一样的。这样是方便做评估。实际的问题中测试集是不包含label的。

    batches.meta.txt 说明每个label对应的含义
    """

    def __init__(self,
                 input_file_paths,
                 batch_size,
                 example_per_epoch_num,
                 parallel_thread_num=16,
                 label_bytes=1,
                 image_height=32,
                 image_width=32,
                 target_image_height=24,
                 target_image_width=24,
                 channel=3):

        """
        初始化
        :param label_bytes: lable占的字节数
        :param image_height: 原始读入的图片高度
        :param image_width: 原始读入的图片宽度
        :param target_image_height: 目标生成的image height
        :param target_image_width: 目标生成的image width
        :param channel: 图片channel
        """
        super(CIFAR10DataInput, self).__init__(input_file_paths=input_file_paths,
                                               batch_size=batch_size,
                                               example_per_epoch_num=example_per_epoch_num,
                                               parallel_thread_num=parallel_thread_num)
        self._label_bytes = label_bytes
        self._image_height = image_height
        self._image_width = image_width
        self._target_image_height = target_image_height
        self._target_image_width = target_image_width
        self.channel = channel

    def _read_data_from_queue(self, file_path_queue):

        record = CIFAR10Record()

        record.channel = 3
        record.height = self._image_height
        record.width = self._image_width

        image_bytes = self._image_height * self._image_width*self.channel
        record_bytes = self._label_bytes + image_bytes

        # 从queue中读取
        reader = tf.FixedLengthRecordReader(record_bytes=record_bytes)

        # retern, key, value就是我们要的数据
        record.key, value = reader.read(file_path_queue, name='image_reader')

        # 对value解码成图片 和 label的字节
        image_label_bytes = tf.decode_raw(value, tf.uint8)

        # 提取label
        record.label = tf.cast(tf.slice(image_label_bytes, [0], [self._label_bytes]), tf.int32)

        # 提取image
        image_bytes = tf.slice(image_label_bytes, [0+self._label_bytes], [image_bytes])

        # reshape成 3 * 32 * 32 的矩阵。因为数据的存储就是 (R, G, B) => 1024, 1024, 1024
        image_bytes = tf.reshape(image_bytes, [record.channel, record.height, record.width])

        # 而tensorflow要的image是 => 32 * 32 * 3的格式. 将[0, 1, 2] (c,h,w)变成 [1, 2, 0](h,w,c)
        record.uint8image = tf.transpose(image_bytes, [1, 2, 0])

        return record

    def _preprocess_data(self, record):
        """
        对图像的预处理 加入噪音
        :param record: 在_read_data_from_queue读取到record
        :return:
        """

        # 将image cast成float32数据
        reshaped_image = tf.cast(record.uint8image, tf.float32)

        height = self._target_image_height
        width = self._target_image_width

        # 随机crop
        distorted_image = tf.random_crop(reshaped_image, [height, width, 3])

        # 随机翻转
        distorted_image = tf.image.random_flip_left_right(distorted_image)

        # 加入brithness
        distorted_image = tf.image.random_brightness(distorted_image, max_delta=63)

        # 调整对比度
        distorted_image = tf.image.random_contrast(distorted_image, lower=0.2, upper=1.8)

        # 图片标准化
        float_image = tf.image.per_image_standardization(distorted_image)

        return float_image, record.label

# coding:utf-8
"""
建立前向模型
"""

from abc import ABCMeta
from abc import abstractmethod

import tensorflow as tf


class IInference(object):
    """
    建立前向模型
    """

    def __init__(self):
        self._loss_name = 'losses'

    @abstractmethod
    def inference(self, data):
        """
        建立前向模型
        :param data: 输入的数据
        :return: tensorflow op
        """
        pass

    @abstractmethod
    def loss(self, inference, label):
        """
        计算loss
        :param inference: inference中产生的前向数据
        :param label: label
        :return:
        """
        pass

    def bias(self, name, shape, initializer=tf.constant_initializer(0.0)):
        """
        创建bias
        :param name: bias名字
        :param shape: bias的shape
        :param initializer: initializer
        :return: bias variable
        """

        return tf.get_variable(name=name,
                               shape=shape,
                               initializer=initializer)

    def viariable_with_weight_decay(self, name, shape, stddev, l2_decay):
        """
        创建viariable
        :param name: 名称
        :param shape: shape
        :param stddev: 标准差
        :param l2_decay: l2loss 的系数. 如果l2_decay=None 则不进行l2 loss.
        :return: 生成viaralibe tensor
        """
        var = tf.get_variable(name=name,
                              shape=shape,
                              initializer=tf.truncated_normal_initializer(stddev=stddev, dtype=tf.float32),
                              dtype=tf.float32)

        # 增加l2loss

        if l2_decay is not None:
            weight_decay = tf.multiply(tf.nn.l2_loss(var), l2_decay, name='weight_loss')

            # 产生的l2 regular 是在最后计算的时候，将所有的l2 添加进去，所以需要先保存起来.
            # add_to_collection 相当于维护了一个key, value结构,value是一个list.
            tf.add_to_collection(self._loss_name, weight_decay)

        return var

# coding:utf-8
"""
CIFAR10的inference
"""

from inference import IInference

import tensorflow as tf
import re


class CIFAR10Inference(IInference):
    """
    CIFAR10的Inference使用cnn
    """

    def __init__(self, image_channel, batch_size, label_class_num):
        """
        初始化
        :param image_channel: image_channel
        """
        super(CIFAR10Inference, self).__init__()
        # 定义卷积核shape, 5*5*channel
        self._kernel_width = 5
        self._kernel_height = 5
        self._image_channel = image_channel
        self._batch_size = batch_size
        self._label_class_num = label_class_num

    @staticmethod
    def activation_summary(x):
        """
        将激活后的结果summary出来
        :param x:
        :return:
        """
        tower_name = 'tower'
        tensor_name = re.sub('%s_[0-9]*/' % tower_name, '', x.op.name)
        tf.summary.histogram(tensor_name + '/activations', x)
        tf.summary.scalar(tensor_name + '/sparsity', tf.nn.zero_fraction(x))

    def inference(self, images):

        # 构建 卷积层1
        conv1_kernel_num = 64  # 第一层 64个神经元
        with tf.variable_scope('conv1') as scope:
            kernel = self.viariable_with_weight_decay(
                name='weight',
                shape=[self._kernel_height, self._kernel_width, self._image_channel, conv1_kernel_num],
                stddev=5e-2,
                l2_decay=0.0)

            # 卷积
            conv = tf.nn.conv2d(images, kernel, [1, 1, 1, 1], padding='SAME')

            # bias
            bias = self.bias('bias', [conv1_kernel_num])

            pre_activation = tf.nn.bias_add(conv, bias=bias)

            # 激活函数 relu
            conv1 = tf.nn.relu(pre_activation, name=scope.name)

            # summary conv1
            CIFAR10Inference.activation_summary(conv1)

        # max pooling
        pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1],
                               padding='SAME', name='pool1')
        # 对pool1 normalize
        norm1 = tf.nn.lrn(pool1, depth_radius=4, bias=1,
                          alpha=0.001/9.0, beta=0.75,
                          name='norm1')

        # 构建conv2
        conv2_kernel_num = 64  # 第一层 64个神经元
        with tf.variable_scope('conv2') as scope:
            kernel = self.viariable_with_weight_decay(
                name='weight',
                shape=[self._kernel_height, self._kernel_width, conv1_kernel_num, conv2_kernel_num],
                stddev=5e-2,
                l2_decay=0.0)

            # 卷积
            conv = tf.nn.conv2d(norm1, kernel, [1, 1, 1, 1], padding='SAME')

            # bias
            bias = self.bias('bias', [conv2_kernel_num], tf.constant_initializer(0.1))

            pre_activation = tf.nn.bias_add(conv, bias=bias)

            # 激活函数 relu
            conv2 = tf.nn.relu(pre_activation, name=scope.name)

            # summary conv1
            CIFAR10Inference.activation_summary(conv2)

        # 对pool1 normalize
        norm2 = tf.nn.lrn(conv2, depth_radius=4, bias=1,
                          alpha=0.001 / 9.0, beta=0.75,
                          name='norm2')
        # max pooling
        pool2 = tf.nn.max_pool(norm2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1],
                               padding='SAME', name='pool1')

        # local3 对卷积层进行全连接
        with tf.variable_scope('local3') as scope:
            reshape = tf.reshape(pool2,
                                 shape=[self._batch_size, -1])

            # 将pool2打平之后的一维向量
            dim = reshape.get_shape()[1].value

            weights = self.viariable_with_weight_decay('weights',
                                                       shape=[dim, 384],
                                                       stddev=0.04,
                                                       l2_decay=0.004)
            bias = self.bias('bias',
                             shape=[384],
                             initializer=tf.constant_initializer(0.1))

            local3 = tf.nn.relu(tf.matmul(reshape, weights) + bias, name=scope.name)

            self.activation_summary(local3)

        # local4 全连接
        with tf.variable_scope('local4') as scope:

            weights = self.viariable_with_weight_decay('weights',
                                                       shape=[384, 192],
                                                       stddev=0.04,
                                                       l2_decay=0.004)
            bias = self.bias('bias',
                             shape=[192],
                             initializer=tf.constant_initializer(0.1))

            local4 = tf.nn.relu(tf.matmul(local3, weights) + bias, name=scope.name)

            self.activation_summary(local4)

        # 最后一层softmax
        with tf.variable_scope('softmax') as scope:
            weights = self.viariable_with_weight_decay('weights',
                                                       shape=[192, self._label_class_num],
                                                       stddev=0.04,
                                                       l2_decay=0.004)
            bias = self.bias('bias',
                             shape=[self._label_class_num],
                             initializer=tf.constant_initializer(0.0))

            softmax_linear = tf.add(tf.matmul(local4, weights), bias, name=scope.name)
            self.activation_summary(softmax_linear)

        return softmax_linear

    def loss(self, logits, label):
        label = tf.cast(label, tf.int64)
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=label,
            logits=logits,
            name='corss_entropy_per_exampel'
        )

        cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy_mean')
        tf.add_to_collection(self._loss_name, cross_entropy_mean)

        # 最后将l2 loss叠加上
        return tf.add_n(tf.get_collection(self._loss_name), name='total_loss')