Tensorflow中Batch Normalization的实现

最新推荐文章于 2021-05-20 12:06:00 发布

Kenn7

最新推荐文章于 2021-05-20 12:06:00 发布

阅读量530

点赞数

分类专栏： Tensorflow

本文链接：https://blog.csdn.net/kane7csdn/article/details/89314812

版权

Tensorflow 专栏收录该内容

12 篇文章 1 订阅

订阅专栏

Batch Normalization的目的： 加速训练并获得更加稳定的结果（深度 | BatchNorm是如何在深度学习优化过程中发挥作用的？）

Batch Normalization的原理实现：（基础 | batchnorm原理及代码详解）

Batch Normalization的代码实现：（参考richardsun-voyager的github）

import tensorflow as tf
import numpy as np

# Load MNIST dataset
import tensorflow.examples.tutorials.mnist.input_data as input_data

mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

n_tr = mnist.train.images.shape[0]  # number of training samples
n_ts = mnist.test.images.shape[0]  # number of testing samples
n_pixel = mnist.train.images.shape[1]


# print(n_tr, n_ts, n_pixel)  # 55000 10000 784

# Create weights
def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.01)
    return tf.Variable(initial, name='weights')


# Create biases
def bias_variable(shape):
    initial = tf.constant(0.01, shape=shape)
    return tf.Variable(initial, name='biases')


# Convolutional function
def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')


# Max pool function
def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                          strides=[1, 2, 2, 1], padding='SAME')


decay = 0.9
epsilon = 0.001


def conv_relu_pool(x, kernel_shape, bias_shape, is_training):
    # Create variable named "weights".
    weights = weight_variable(kernel_shape)
    # Create variable named "biases".
    biases = bias_variable(bias_shape)

    # Create two new parameters, gamma and beta (shift)
    # The bias for batch normalization
    gamma = tf.Variable(tf.ones(bias_shape))
    #
    beta = tf.Variable(tf.zeros(bias_shape))

    conv = conv2d(x, weights)
    z = conv + biases

    moving_mean = tf.Variable(tf.zeros([z.get_shape()[-1]]), trainable=False)
    moving_var = tf.Variable(tf.ones([z.get_shape()[-1]]), trainable=False)
    axis = list(range(len(z.get_shape()) - 1))
    # If training
    if is_training:
        batch_mean, batch_var = tf.nn.moments(z, axis)
        train_mean = tf.assign(moving_mean,
                               moving_mean * decay + batch_mean * (1 - decay))
        train_var = tf.assign(moving_var,
                              moving_var * decay + batch_var * (1 - decay))
        with tf.control_dependencies([train_mean, train_var]):
            bn = tf.nn.batch_normalization(z,
                                           batch_mean, batch_var, beta, gamma, epsilon)
    else:
        bn = tf.nn.batch_normalization(z, moving_mean, moving_var, beta, gamma, epsilon)
    # Activation
    relu = tf.nn.relu(bn)
    pool = max_pool_2x2(relu)
    return pool


def cnnLayer(x, keep_prob, is_training):
    x_image = tf.reshape(x, [-1, 28, 28, 1])
    # First Conv
    with tf.name_scope('hidden1'):
        kernel_shape, bias_shape = [5, 5, 1, 32], [32]
        h_pool1 = conv_relu_pool(x_image, kernel_shape, bias_shape, is_training)

    # Second Conv
    with tf.name_scope('hidden2'):
        kernel_shape, bias_shape = [5, 5, 32, 64], [64]
        h_pool2 = conv_relu_pool(h_pool1, kernel_shape, bias_shape, is_training)

    # Fully Connected Layer
    with tf.name_scope('fully_connected'):
        W_fc1 = weight_variable([7 * 7 * 64, 1024])
        b_fc1 = bias_variable([1024])
        h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
        h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
        # Dropout, to prevent against overfitting
        h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
    # Softmax Layer
    with tf.variable_scope('softmax_layer'):
        W_fc2 = weight_variable([1024, 10])
        b_fc2 = bias_variable([10])
        logits = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
    return tf.nn.softmax(logits)


graph_cnn = tf.Graph()
with graph_cnn.as_default() as g:
    # Create input placeholders
    x = tf.placeholder(tf.float32, [None, 784])
    y_ = tf.placeholder(tf.float32, [None, 10])
    # Define ropout probability placholder
    keep_prob = tf.placeholder("float")

    y = cnnLayer(x, keep_prob, True)
    # Clip value in order to make it within(0, 1)
    y = tf.clip_by_value(y, 1e-10, 1.0)
    # Define cross-entropy as loss function
    cross_entropy = -tf.reduce_sum(y_ * tf.log(y))
    # Adam Optimizer
    train_step = tf.train.AdamOptimizer(0.0005).minimize(cross_entropy)
    # Define accuracy
    correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    # Record the session
    saver = tf.train.Saver()

    epochs = 5

    batch_size = 64
    num_steps = int(n_tr / batch_size)
    # Create a session
    with tf.Session(graph=graph_cnn) as sess:
        # Initialize variables
        init_op = tf.global_variables_initializer()
        sess.run(init_op)
        for _ in range(epochs):
            for step in range(num_steps):
                batch_data, batch_labels = mnist.train.next_batch(batch_size)
                feed_dict = {x: batch_data, y_: batch_labels, keep_prob: 0.5}
                # Train
                _, loss = sess.run([train_step, cross_entropy], feed_dict=feed_dict)
                if step % 500 == 0:
                    feed_dict = {x: batch_data, y_: batch_labels, keep_prob: 1}
                    loss = sess.run(cross_entropy, feed_dict=feed_dict)
                    print('Loss:', loss)
        # Save the model
        saved_model = saver.save(sess, 'temp1/model.ckpt')

with graph_cnn.as_default() as g:
    # Set it as testing model
    y = cnnLayer(x, keep_prob, False)

with tf.Session(graph=graph_cnn) as sess:
    sess.run(tf.global_variables_initializer())
    # Restore the model
    ckpt = tf.train.get_checkpoint_state('temp1')
    saver.restore(sess, ckpt.model_checkpoint_path)
    count = 0
    for _ in range(200):
        batch_data, batch_labels = mnist.test.next_batch(50)
        feed_dict = {x: batch_data, y_: batch_labels, keep_prob: 1}
        cp = sess.run(correct_prediction, feed_dict=feed_dict)
        count += np.sum(cp)
    print("Testing Accuracy：", count / n_ts)