4-CNN-demo-0302-权重初始化

最新推荐文章于 2022-09-01 16:18:41 发布

HJZ11

最新推荐文章于 2022-09-01 16:18:41 发布

阅读量487

点赞数

分类专栏： # 深度学习4-CNN

本文链接：https://blog.csdn.net/HJZ11/article/details/103743295

版权

深度学习4-CNN 专栏收录该内容

21 篇文章 0 订阅

订阅专栏



import tensorflow as tf
import helper
import numpy as np
import matplotlib as mpl
from tensorflow.examples.tutorials.mnist import input_data
# 设置字符集，防止中文乱码
mpl.rcParams['font.sans-serif'] = [u'simHei']
mpl.rcParams['axes.unicode_minus'] = False

"""
# 本章节目的：
    - 演示不同权重值对模型效果的影响，我们使用相同的神经网络和数据集进行测试。
    - 数据集是 MNIST  10个类，手写数字 0-9, 并且被归一化到(0.0 - 1.0).
"""


# todo - 先看下 helper定义的网络结构。


print('Getting MNIST Dataset...')
mnist = input_data.read_data_sets("./datasets", one_hot=True)
print('Data Extracted.')

# todo-问题，全设置为0或者1 网络会学习嘛？
# 使用全部相同的权值，导致每一个神经元都输出同样的值，这将使得权重很难学习。
def weight_all_0or1():
    # 每一层权重的shape
    layer_1_weight_shape = (mnist.train.images.shape[1], 256)
    layer_2_weight_shape = (256, 128)
    layer_3_weight_shape = (128, mnist.train.labels.shape[1])

    all_zero_weights = [
        tf.Variable(tf.zeros(layer_1_weight_shape)),
        tf.Variable(tf.zeros(layer_2_weight_shape)),
        tf.Variable(tf.zeros(layer_3_weight_shape))
    ]

    all_one_weights = [
        tf.Variable(tf.ones(layer_1_weight_shape)),
        tf.Variable(tf.ones(layer_2_weight_shape)),
        tf.Variable(tf.ones(layer_3_weight_shape))
    ]

    helper.compare_init_weights(
        mnist,
        'all 0 vs all 1',
        [
            (all_zero_weights, 'all 0'),
            (all_one_weights, 'all 1')])

"""
# 均匀分布 指任何一个数值都有相同概率被选中. 在 TensorFlow's中，使用 tf.random_uniform 函数来从均匀分布数值中随机选一个值.
random_uniform(shape,
                   minval=0,
                   maxval=None,
                   dtype=dtypes.float32,
                   seed=None,
                   name=None):
      Outputs random values from a uniform distribution.
"""
# todo 直方图来对均匀分布做一个可视化展示,随机生成1000个在（-3,3）的随机数字。1000个值，用了500个桶。
def uniform_histgram():
    helper.hist_dist('Random Uniform (minval=-3, maxval=3)', tf.random_uniform([1000], -3, 3))

def weights_random_uniform():
    tf.reset_default_graph()
    # 每一层权重的shape
    layer_1_weight_shape = (mnist.train.images.shape[1], 256)
    layer_2_weight_shape = (256, 128)
    layer_3_weight_shape = (128, mnist.train.labels.shape[1])

    basline_weights = [
        tf.Variable(tf.random_uniform(layer_1_weight_shape)),
        tf.Variable(tf.random_uniform(layer_2_weight_shape)),
        tf.Variable(tf.random_uniform(layer_3_weight_shape))
    ]

    helper.compare_init_weights(
        mnist,
        'Baseline',
        [(basline_weights, 'tf.random_uniform [0, 1)')])

# todo 通用法则就是： 好的权值初始化范围为 $[-y, y]$ 且$y=1/\sqrt{n}$ ($n$ 是输入神经元数量).
#      在验证上述法则之前，我们先看看将权值范围调整到 [-1, 1)，的效果。
def weights_random_uniform1():
    tf.reset_default_graph()
    # 每一层权重的shape
    layer_1_weight_shape = (mnist.train.images.shape[1], 256)
    layer_2_weight_shape = (256, 128)
    layer_3_weight_shape = (128, mnist.train.labels.shape[1])

    basline_weights = [
        tf.Variable(tf.random_uniform(layer_1_weight_shape)),
        tf.Variable(tf.random_uniform(layer_2_weight_shape)),
        tf.Variable(tf.random_uniform(layer_3_weight_shape))
    ]

    uniform_neg1to1_weights = [
        tf.Variable(tf.random_uniform(layer_1_weight_shape, -1, 1)),
        tf.Variable(tf.random_uniform(layer_2_weight_shape, -1, 1)),
        tf.Variable(tf.random_uniform(layer_3_weight_shape, -1, 1))
    ]

    helper.compare_init_weights(
        mnist,
        '[0, 1) vs [-1, 1)',
        [
            (basline_weights, 'tf.random_uniform [0, 1)'),
            (uniform_neg1to1_weights, 'tf.random_uniform [-1, 1)')])

# todo 继续比较下[-0.1, 0.1), [-0.01, 0.01), and [-0.001, 0.001) 看是否越小越好.
#      并设置 plot_n_batches=None 将所有图可视化
def weights_random_uniform2():
    tf.reset_default_graph()
    # 每一层权重的shape
    layer_1_weight_shape = (mnist.train.images.shape[1], 256)
    layer_2_weight_shape = (256, 128)
    layer_3_weight_shape = (128, mnist.train.labels.shape[1])

    uniform_neg1to1_weights = [
        tf.Variable(tf.random_uniform(layer_1_weight_shape, -1, 1)),
        tf.Variable(tf.random_uniform(layer_2_weight_shape, -1, 1)),
        tf.Variable(tf.random_uniform(layer_3_weight_shape, -1, 1))
    ]

    uniform_neg01to01_weights = [
        tf.Variable(tf.random_uniform(layer_1_weight_shape, -0.1, 0.1)),
        tf.Variable(tf.random_uniform(layer_2_weight_shape, -0.1, 0.1)),
        tf.Variable(tf.random_uniform(layer_3_weight_shape, -0.1, 0.1))
    ]

    uniform_neg001to001_weights = [
        tf.Variable(tf.random_uniform(layer_1_weight_shape, -0.01, 0.01)),
        tf.Variable(tf.random_uniform(layer_2_weight_shape, -0.01, 0.01)),
        tf.Variable(tf.random_uniform(layer_3_weight_shape, -0.01, 0.01))
    ]

    uniform_neg0001to0001_weights = [
        tf.Variable(tf.random_uniform(layer_1_weight_shape, -0.001, 0.001)),
        tf.Variable(tf.random_uniform(layer_2_weight_shape, -0.001, 0.001)),
        tf.Variable(tf.random_uniform(layer_3_weight_shape, -0.001, 0.001))
    ]

    helper.compare_init_weights(
        mnist,
        '[-1, 1) vs [-0.1, 0.1) vs [-0.01, 0.01) vs [-0.001, 0.001)',
        [
            (uniform_neg1to1_weights, '[-1, 1)'),
            (uniform_neg01to01_weights, '[-0.1, 0.1)'),
            (uniform_neg001to001_weights, '[-0.01, 0.01)'),
            (uniform_neg0001to0001_weights, '[-0.001, 0.001)')],
        plot_n_batches=None)


# todo [-0.01, 0.01) 往下就太小了。 让我们试试我们的经验法则 $y=1/\sqrt{n}$.
def weights_random_uniform3():
    tf.reset_default_graph()
    # 每一层权重的shape
    layer_1_weight_shape = (mnist.train.images.shape[1], 256)
    layer_2_weight_shape = (256, 128)
    layer_3_weight_shape = (128, mnist.train.labels.shape[1])

    uniform_neg01to01_weights = [
        tf.Variable(tf.random_uniform(layer_1_weight_shape, -0.1, 0.1)),
        tf.Variable(tf.random_uniform(layer_2_weight_shape, -0.1, 0.1)),
        tf.Variable(tf.random_uniform(layer_3_weight_shape, -0.1, 0.1))
    ]

    general_rule_weights = [
        tf.Variable(tf.random_uniform(layer_1_weight_shape, -1 / np.sqrt(layer_1_weight_shape[0]),
                                      1 / np.sqrt(layer_1_weight_shape[0]))),
        tf.Variable(tf.random_uniform(layer_2_weight_shape, -1 / np.sqrt(layer_2_weight_shape[0]),
                                      1 / np.sqrt(layer_2_weight_shape[0]))),
        tf.Variable(tf.random_uniform(layer_3_weight_shape, -1 / np.sqrt(layer_3_weight_shape[0]),
                                      1 / np.sqrt(layer_3_weight_shape[0])))
    ]
    print(1/np.sqrt(layer_1_weight_shape[0]), 1/np.sqrt(layer_2_weight_shape[0]) ,1/np.sqrt(layer_3_weight_shape[0]))

    helper.compare_init_weights(
        mnist,
        '[-0.1, 0.1) vs General Rule',
        [
            (uniform_neg01to01_weights, '[-0.1, 0.1)'),
            (general_rule_weights, 'General Rule')],
        plot_n_batches=None)

# todo 下面试试正态分布(高斯分布)
"""
random_normal(shape,
                  mean=0.0,
                  stddev=1.0,
                  dtype=dtypes.float32,
                  seed=None,
                  name=None):
     Outputs random values from a normal distribution.
"""
def random_uniform_histgram():
    helper.hist_dist('Random Normal (mean=0.0, stddev=1.0)', tf.random_normal([1000]))
# todo 用之前效果最好的均匀分布 和 正态分布比较下。
def weights_random_uniform4():
    tf.reset_default_graph()
    # 每一层权重的shape
    layer_1_weight_shape = (mnist.train.images.shape[1], 256)
    layer_2_weight_shape = (256, 128)
    layer_3_weight_shape = (128, mnist.train.labels.shape[1])

    uniform_neg01to01_weights = [
        tf.Variable(tf.random_uniform(layer_1_weight_shape, -0.1, 0.1)),
        tf.Variable(tf.random_uniform(layer_2_weight_shape, -0.1, 0.1)),
        tf.Variable(tf.random_uniform(layer_3_weight_shape, -0.1, 0.1))
    ]

    normal_01_weights = [
        tf.Variable(tf.random_normal(layer_1_weight_shape, stddev=0.1)),
        tf.Variable(tf.random_normal(layer_2_weight_shape, stddev=0.1)),
        tf.Variable(tf.random_normal(layer_3_weight_shape, stddev=0.1))
    ]

    helper.compare_init_weights(
        mnist,
        'Uniform [-0.1, 0.1) vs Normal stddev 0.1',
        [
            (uniform_neg01to01_weights, 'Uniform [-0.1, 0.1)'),
            (normal_01_weights, 'Normal stddev 0.1')])


# todo 再进一步，试试 截尾正态分布。大于2倍标准差范围的数值将被舍去。
"""
truncated_normal(shape,
                     mean=0.0,
                     stddev=1.0,
                     dtype=dtypes.float32,
                     seed=None,
                     name=None):
    Outputs random values from a truncated normal distribution.
"""
def truncated_normal_histgram():
    helper.hist_dist('截尾正态分布 (mean=0.0, stddev=1.0)', tf.truncated_normal([1000]))

# todo 用截尾正态分布 和 正态分布进行比较
def weights_random_uniform5():
    tf.reset_default_graph()
    # 每一层权重的shape
    layer_1_weight_shape = (mnist.train.images.shape[1], 256)
    layer_2_weight_shape = (256, 128)
    layer_3_weight_shape = (128, mnist.train.labels.shape[1])

    normal_01_weights = [
        tf.Variable(tf.random_normal(layer_1_weight_shape, stddev=0.1)),
        tf.Variable(tf.random_normal(layer_2_weight_shape, stddev=0.1)),
        tf.Variable(tf.random_normal(layer_3_weight_shape, stddev=0.1))
    ]

    trunc_normal_01_weights = [
        tf.Variable(tf.truncated_normal(layer_1_weight_shape, stddev=0.1)),
        tf.Variable(tf.truncated_normal(layer_2_weight_shape, stddev=0.1)),
        tf.Variable(tf.truncated_normal(layer_3_weight_shape, stddev=0.1))
    ]

    helper.compare_init_weights(
        mnist,
        '正态分布 vs 截尾正态分布',
        [
            (normal_01_weights, '正态分布'),
            (trunc_normal_01_weights, '截尾正态分布')])
# todo 视乎截尾正态分布效果一般。其实是因为这个网络结构小了。考虑到大型网络参数多，我们更有可能会获取大于2倍标准差的值。
#      所以用截尾正态分布就对了。

"""
最后，我们和我们开篇的方法比较下 两者的效果。
"""
def weights_random_uniform6():
    tf.reset_default_graph()
    # 每一层权重的shape
    layer_1_weight_shape = (mnist.train.images.shape[1], 256)
    layer_2_weight_shape = (256, 128)
    layer_3_weight_shape = (128, mnist.train.labels.shape[1])

    basline_weights = [
        tf.Variable(tf.random_uniform(layer_1_weight_shape)),
        tf.Variable(tf.random_uniform(layer_2_weight_shape)),
        tf.Variable(tf.random_uniform(layer_3_weight_shape))
    ]

    trunc_normal_01_weights = [
        tf.Variable(tf.truncated_normal(layer_1_weight_shape, stddev=0.1)),
        tf.Variable(tf.truncated_normal(layer_2_weight_shape, stddev=0.1)),
        tf.Variable(tf.truncated_normal(layer_3_weight_shape, stddev=0.1))
    ]

    helper.compare_init_weights(
        mnist,
        '均匀分布 vs 截尾正态分布',
        [
            (basline_weights, '均匀分布'),
            (trunc_normal_01_weights, '截尾正态分布')])


if __name__=='__main__':
    # weight_all_0or1()
    # uniform_histgram()
    # weights_random_uniform()  # 图中：损失一直在下降，表明我们网络正在学习。明显好于全0或全1.这是一个好方向。
    # weights_random_uniform1()
    # weights_random_uniform2()
    # weights_random_uniform3()  # [-0.1 ,0.1] 和我们的通用法则 $y=1/\sqrt{n}$ 的数值非常接近，所以效果都不错。
    # random_uniform_histgram()
    # weights_random_uniform4()
    # truncated_normal_histgram()
    weights_random_uniform5()
    #weights_random_uniform6()

helper代码

import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf


def hist_dist(title, distribution_tensor, hist_range=(-4, 4)):
    """
    Display histogram of a TF distribution
    """
    with tf.Session() as sess:
        values = sess.run(distribution_tensor)

    plt.title(title)
    plt.hist(values, np.linspace(*hist_range, num=len(values)/2))
    plt.show()


def _get_loss_acc(dataset, weights):
    """
    Get losses and validation accuracy of example neural network
    """
    batch_size = 128
    epochs = 2
    learning_rate = 0.001

    features = tf.placeholder(tf.float32)
    labels = tf.placeholder(tf.float32)
    learn_rate = tf.placeholder(tf.float32)

    # todo- 这里只定义了 偏置项 初始值，权重值 有参数传入进来！！！
    biases = [
        tf.Variable(tf.zeros([256])),
        tf.Variable(tf.zeros([128])),
        tf.Variable(tf.zeros([dataset.train.labels.shape[1]]))
    ]

    # todo-网络，共3层，2层隐藏层，1层输出层。
    layer_1 = tf.nn.relu(tf.matmul(features, weights[0]) + biases[0])
    layer_2 = tf.nn.relu(tf.matmul(layer_1, weights[1]) + biases[1])
    logits = tf.matmul(layer_2, weights[2]) + biases[2]

    # Training loss
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels))

    # Optimizer
    optimizer = tf.train.AdamOptimizer(learn_rate).minimize(loss)

    # Accuracy
    correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    # Measurements use for graphing loss
    loss_batch = []

    with tf.Session() as session:
        session.run(tf.global_variables_initializer())
        # todo-求有多少个batch_size(即每1个epoch的步数) = 用总样本数量/batch_size
        batch_count = int((dataset.train.num_examples / batch_size))

        # 训练
        for epoch_i in range(epochs):
            for batch_i in range(batch_count):
                batch_features, batch_labels = dataset.train.next_batch(batch_size)

                # Run optimizer and get loss
                session.run(
                    optimizer,
                    feed_dict={features: batch_features, labels: batch_labels, learn_rate: learning_rate})
                l = session.run(
                    loss,
                    feed_dict={features: batch_features, labels: batch_labels, learn_rate: learning_rate})
                loss_batch.append(l)

        valid_acc = session.run(
            accuracy,
            feed_dict={features: dataset.validation.images, labels: dataset.validation.labels, learn_rate: 1.0})

    # Hack to Reset batches
    dataset.train._index_in_epoch = 0
    dataset.train._epochs_completed = 0

    return loss_batch, valid_acc


def compare_init_weights(
        dataset,
        title,
        weight_init_list,
        plot_n_batches=100):
    """
    Plot loss and print stats of weights using an example neural network
    """
    colors = ['r', 'b', 'g', 'c', 'y', 'k']
    label_accs = []
    label_loss = []

    assert len(weight_init_list) <= len(colors), 'Too many inital weights to plot'

    for i, (weights, label) in enumerate(weight_init_list):
        loss, val_acc = _get_loss_acc(dataset, weights)

        plt.plot(loss[:plot_n_batches], colors[i], label=label)
        label_accs.append((label, val_acc))
        label_loss.append((label, loss[-1]))

    plt.title(title)
    plt.xlabel('Batches')
    plt.ylabel('Loss')
    plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
    plt.show()

    print('After 858 Batches (2 Epochs):')
    print('Validation Accuracy')
    for label, val_acc in label_accs:
        print('  {:7.3f}% -- {}'.format(val_acc*100, label))
    print('Loss')
    for label, loss in label_loss:
        print('  {:7.3f}  -- {}'.format(loss, label))

D:\Anaconda\python.exe D:/AI20/HJZ/04-深度学习/3-CNN/20191207/2019--权重初始化/02Weight_Initial.py
Getting MNIST Dataset...
WARNING:tensorflow:From D:/AI20/HJZ/04-深度学习/3-CNN/20191207/2019--权重初始化/02Weight_Initial.py:23: read_data_sets (from tensorflow.contrib.learn.python.learn.datasets.mnist) is deprecated and will be removed in a future version.
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
WARNING:tensorflow:From D:\Anaconda\lib\site-packages\tensorflow\contrib\learn\python\learn\datasets\mnist.py:260: maybe_download (from tensorflow.contrib.learn.python.learn.datasets.base) is deprecated and will be removed in a future version.
Instructions for updating:
Please write your own downloading logic.
WARNING:tensorflow:From D:\Anaconda\lib\site-packages\tensorflow\contrib\learn\python\learn\datasets\mnist.py:262: extract_images (from tensorflow.contrib.learn.python.learn.datasets.mnist) is deprecated and will be removed in a future version.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting ./datasets\train-images-idx3-ubyte.gz
WARNING:tensorflow:From D:\Anaconda\lib\site-packages\tensorflow\contrib\learn\python\learn\datasets\mnist.py:267: extract_labels (from tensorflow.contrib.learn.python.learn.datasets.mnist) is deprecated and will be removed in a future version.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting ./datasets\train-labels-idx1-ubyte.gz
WARNING:tensorflow:From D:\Anaconda\lib\site-packages\tensorflow\contrib\learn\python\learn\datasets\mnist.py:110: dense_to_one_hot (from tensorflow.contrib.learn.python.learn.datasets.mnist) is deprecated and will be removed in a future version.
Instructions for updating:
Please use tf.one_hot on tensors.
Extracting ./datasets\t10k-images-idx3-ubyte.gz
Extracting ./datasets\t10k-labels-idx1-ubyte.gz
WARNING:tensorflow:From D:\Anaconda\lib\site-packages\tensorflow\contrib\learn\python\learn\datasets\mnist.py:290: DataSet.__init__ (from tensorflow.contrib.learn.python.learn.datasets.mnist) is deprecated and will be removed in a future version.
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Data Extracted.
WARNING:tensorflow:From D:\AI20\HJZ\04-深度学习\3-CNN\20191207\2019--权重初始化\helper.py:43: softmax_cross_entropy_with_logits (from tensorflow.python.ops.nn_ops) is deprecated and will be removed in a future version.
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See @{tf.nn.softmax_cross_entropy_with_logits_v2}.

2019-12-28 12:13:23.268683: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX AVX2
After 858 Batches (2 Epochs):
Validation Accuracy
   96.820% -- 正态分布
   97.000% -- 截尾正态分布
Loss
    0.042  -- 正态分布
    0.098  -- 截尾正态分布

Process finished with exit code 0

在这里插入图片描述

HJZ11

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
4-CNN-demo-0302-权重初始化

import tensorflow as tfimport helperimport numpy as npimport matplotlib as mplfrom tensorflow.examples.tutorials.mnist import input_data# 设置字符集，防止中文乱码mpl.rcParams['font.sans-serif'] = [u'simHe...
复制链接

扫一扫

专栏目录