import tensorflow as tf
import helper
import numpy as np
import matplotlib as mpl
from tensorflow.examples.tutorials.mnist import input_data
# 设置字符集,防止中文乱码
mpl.rcParams['font.sans-serif'] = [u'simHei']
mpl.rcParams['axes.unicode_minus'] = False
"""
# 本章节目的:
- 演示不同权重值对模型效果的影响,我们使用相同的神经网络和数据集进行测试。
- 数据集是 MNIST 10个类,手写数字 0-9, 并且被归一化到(0.0 - 1.0).
"""
# todo - 先看下 helper定义的网络结构。
print('Getting MNIST Dataset...')
mnist = input_data.read_data_sets("./datasets", one_hot=True)
print('Data Extracted.')
# todo-问题,全设置为0或者1 网络会学习嘛?
# 使用全部相同的权值,导致每一个神经元都输出同样的值,这将使得权重很难学习。
def weight_all_0or1():
# 每一层权重的shape
layer_1_weight_shape = (mnist.train.images.shape[1], 256)
layer_2_weight_shape = (256, 128)
layer_3_weight_shape = (128, mnist.train.labels.shape[1])
all_zero_weights = [
tf.Variable(tf.zeros(layer_1_weight_shape)),
tf.Variable(tf.zeros(layer_2_weight_shape)),
tf.Variable(tf.zeros(layer_3_weight_shape))
]
all_one_weights = [
tf.Variable(tf.ones(layer_1_weight_shape)),
tf.Variable(tf.ones(layer_2_weight_shape)),
tf.Variable(tf.ones(layer_3_weight_shape))
]
helper.compare_init_weights(
mnist,
'all 0 vs all 1',
[
(all_zero_weights, 'all 0'),
(all_one_weights, 'all 1')])
"""
# 均匀分布 指任何一个数值都有相同概率被选中. 在 TensorFlow's中,使用 tf.random_uniform 函数来从均匀分布数值中随机选一个值.
random_uniform(shape,
minval=0,
maxval=None,
dtype=dtypes.float32,
seed=None,
name=None):
Outputs random values from a uniform distribution.
"""
# todo 直方图来对均匀分布做一个可视化展示,随机生成1000个在(-3,3)的随机数字。1000个值,用了500个桶。
def uniform_histgram():
helper.hist_dist('Random Uniform (minval=-3, maxval=3)', tf.random_uniform([1000], -3, 3))
def weights_random_uniform():
tf.reset_default_graph()
# 每一层权重的shape
layer_1_weight_shape = (mnist.train.images.shape[1], 256)
layer_2_weight_shape = (256, 128)
layer_3_weight_shape = (128, mnist.train.labels.shape[1])
basline_weights = [
tf.Variable(tf.random_uniform(layer_1_weight_shape)),
tf.Variable(tf.random_uniform(layer_2_weight_shape)),
tf.Variable(tf.random_uniform(layer_3_weight_shape))
]
helper.compare_init_weights(
mnist,
'Baseline',
[(basline_weights, 'tf.random_uniform [0, 1)')])
# todo 通用法则就是: 好的权值初始化范围为 $[-y, y]$ 且$y=1/\sqrt{n}$ ($n$ 是输入神经元数量).
# 在验证上述法则之前,我们先看看将权值范围调整到 [-1, 1),的效果。
def weights_random_uniform1():
tf.reset_default_graph()
# 每一层权重的shape
layer_1_weight_shape = (mnist.train.images.shape[1], 256)
layer_2_weight_shape = (256, 128)
layer_3_weight_shape = (128, mnist.train.labels.shape[1])
basline_weights = [
tf.Variable(tf.random_uniform(layer_1_weight_shape)),
tf.Variable(tf.random_uniform(layer_2_weight_shape)),
tf.Variable(tf.random_uniform(layer_3_weight_shape))
]
uniform_neg1to1_weights = [
tf.Variable(tf.random_uniform(layer_1_weight_shape, -1, 1)),
tf.Variable(tf.random_uniform(layer_2_weight_shape, -1, 1)),
tf.Variable(tf.random_uniform(layer_3_weight_shape, -1, 1))
]
helper.compare_init_weights(
mnist,
'[0, 1) vs [-1, 1)',
[
(basline_weights, 'tf.random_uniform [0, 1)'),
(uniform_neg1to1_weights, 'tf.random_uniform [-1, 1)')])
# todo 继续比较下[-0.1, 0.1), [-0.01, 0.01), and [-0.001, 0.001) 看是否越小越好.
# 并设置 plot_n_batches=None 将所有图可视化
def weights_random_uniform2():
tf.reset_default_graph()
# 每一层权重的shape
layer_1_weight_shape = (mnist.train.images.shape[1], 256)
layer_2_weight_shape = (256, 128)
layer_3_weight_shape = (128, mnist.train.labels.shape[1])
uniform_neg1to1_weights = [
tf.Variable(tf.random_uniform(layer_1_weight_shape, -1, 1)),
tf.Variable(tf.random_uniform(layer_2_weight_shape, -1, 1)),
tf.Variable(tf.random_uniform(layer_3_weight_shape, -1, 1))
]
uniform_neg01to01_weights = [
tf.Variable(tf.random_uniform(layer_1_weight_shape, -0.1, 0.1)),
tf.Variable(tf.random_uniform(layer_2_weight_shape, -0.1, 0.1)),
tf.Variable(tf.random_uniform(layer_3_weight_shape, -0.1, 0.1))
]
uniform_neg001to001_weights = [
tf.Variable(tf.random_uniform(layer_1_weight_shape, -0.01, 0.01)),
tf.Variable(tf.random_uniform(layer_2_weight_shape, -0.01, 0.01)),
tf.Variable(tf.random_uniform(layer_3_weight_shape, -0.01, 0.01))
]
uniform_neg0001to0001_weights = [
tf.Variable(tf.random_uniform(layer_1_weight_shape, -0.001, 0.001)),
tf.Variable(tf.random_uniform(layer_2_weight_shape, -0.001, 0.001)),
tf.Variable(tf.random_uniform(layer_3_weight_shape, -0.001, 0.001))
]
helper.compare_init_weights(
mnist,
'[-1, 1) vs [-0.1, 0.1) vs [-0.01, 0.01) vs [-0.001, 0.001)',
[
(uniform_neg1to1_weights, '[-1, 1)'),
(uniform_neg01to01_weights, '[-0.1, 0.1)'),
(uniform_neg001to001_weights, '[-0.01, 0.01)'),
(uniform_neg0001to0001_weights, '[-0.001, 0.001)')],
plot_n_batches=None)
# todo [-0.01, 0.01) 往下就太小了。 让我们试试我们的经验法则 $y=1/\sqrt{n}$.
def weights_random_uniform3():
tf.reset_default_graph()
# 每一层权重的shape
layer_1_weight_shape = (mnist.train.images.shape[1], 256)
layer_2_weight_shape = (256, 128)
layer_3_weight_shape = (128, mnist.train.labels.shape[1])
uniform_neg01to01_weights = [
tf.Variable(tf.random_uniform(layer_1_weight_shape, -0.1, 0.1)),
tf.Variable(tf.random_uniform(layer_2_weight_shape, -0.1, 0.1)),
tf.Variable(tf.random_uniform(layer_3_weight_shape, -0.1, 0.1))
]
general_rule_weights = [
tf.Variable(tf.random_uniform(layer_1_weight_shape, -1 / np.sqrt(layer_1_weight_shape[0]),
1 / np.sqrt(layer_1_weight_shape[0]))),
tf.Variable(tf.random_uniform(layer_2_weight_shape, -1 / np.sqrt(layer_2_weight_shape[0]),
1 / np.sqrt(layer_2_weight_shape[0]))),
tf.Variable(tf.random_uniform(layer_3_weight_shape, -1 / np.sqrt(layer_3_weight_shape[0]),
1 / np.sqrt(layer_3_weight_shape[0])))
]
print(1/np.sqrt(layer_1_weight_shape[0]), 1/np.sqrt(layer_2_weight_shape[0]) ,1/np.sqrt(layer_3_weight_shape[0]))
helper.compare_init_weights(
mnist,
'[-0.1, 0.1) vs General Rule',
[
(uniform_neg01to01_weights, '[-0.1, 0.1)'),
(general_rule_weights, 'General Rule')],
plot_n_batches=None)
# todo 下面试试正态分布(高斯分布)
"""
random_normal(shape,
mean=0.0,
stddev=1.0,
dtype=dtypes.float32,
seed=None,
name=None):
Outputs random values from a normal distribution.
"""
def random_uniform_histgram():
helper.hist_dist('Random Normal (mean=0.0, stddev=1.0)', tf.random_normal([1000]))
# todo 用之前效果最好的均匀分布 和 正态分布比较下。
def weights_random_uniform4():
tf.reset_default_graph()
# 每一层权重的shape
layer_1_weight_shape = (mnist.train.images.shape[1], 256)
layer_2_weight_shape = (256, 128)
layer_3_weight_shape = (128, mnist.train.labels.shape[1])
uniform_neg01to01_weights = [
tf.Variable(tf.random_uniform(layer_1_weight_shape, -0.1, 0.1)),
tf.Variable(tf.random_uniform(layer_2_weight_shape, -0.1, 0.1)),
tf.Variable(tf.random_uniform(layer_3_weight_shape, -0.1, 0.1))
]
normal_01_weights = [
tf.Variable(tf.random_normal(layer_1_weight_shape, stddev=0.1)),
tf.Variable(tf.random_normal(layer_2_weight_shape, stddev=0.1)),
tf.Variable(tf.random_normal(layer_3_weight_shape, stddev=0.1))
]
helper.compare_init_weights(
mnist,
'Uniform [-0.1, 0.1) vs Normal stddev 0.1',
[
(uniform_neg01to01_weights, 'Uniform [-0.1, 0.1)'),
(normal_01_weights, 'Normal stddev 0.1')])
# todo 再进一步,试试 截尾正态分布。大于2倍标准差范围的数值将被舍去。
"""
truncated_normal(shape,
mean=0.0,
stddev=1.0,
dtype=dtypes.float32,
seed=None,
name=None):
Outputs random values from a truncated normal distribution.
"""
def truncated_normal_histgram():
helper.hist_dist('截尾正态分布 (mean=0.0, stddev=1.0)', tf.truncated_normal([1000]))
# todo 用截尾正态分布 和 正态分布进行比较
def weights_random_uniform5():
tf.reset_default_graph()
# 每一层权重的shape
layer_1_weight_shape = (mnist.train.images.shape[1], 256)
layer_2_weight_shape = (256, 128)
layer_3_weight_shape = (128, mnist.train.labels.shape[1])
normal_01_weights = [
tf.Variable(tf.random_normal(layer_1_weight_shape, stddev=0.1)),
tf.Variable(tf.random_normal(layer_2_weight_shape, stddev=0.1)),
tf.Variable(tf.random_normal(layer_3_weight_shape, stddev=0.1))
]
trunc_normal_01_weights = [
tf.Variable(tf.truncated_normal(layer_1_weight_shape, stddev=0.1)),
tf.Variable(tf.truncated_normal(layer_2_weight_shape, stddev=0.1)),
tf.Variable(tf.truncated_normal(layer_3_weight_shape, stddev=0.1))
]
helper.compare_init_weights(
mnist,
'正态分布 vs 截尾正态分布',
[
(normal_01_weights, '正态分布'),
(trunc_normal_01_weights, '截尾正态分布')])
# todo 视乎截尾正态分布效果一般。其实是因为这个网络结构小了。考虑到大型网络参数多,我们更有可能会获取大于2倍标准差的值。
# 所以用截尾正态分布就对了。
"""
最后,我们和我们开篇的方法比较下 两者的效果。
"""
def weights_random_uniform6():
tf.reset_default_graph()
# 每一层权重的shape
layer_1_weight_shape = (mnist.train.images.shape[1], 256)
layer_2_weight_shape = (256, 128)
layer_3_weight_shape = (128, mnist.train.labels.shape[1])
basline_weights = [
tf.Variable(tf.random_uniform(layer_1_weight_shape)),
tf.Variable(tf.random_uniform(layer_2_weight_shape)),
tf.Variable(tf.random_uniform(layer_3_weight_shape))
]
trunc_normal_01_weights = [
tf.Variable(tf.truncated_normal(layer_1_weight_shape, stddev=0.1)),
tf.Variable(tf.truncated_normal(layer_2_weight_shape, stddev=0.1)),
tf.Variable(tf.truncated_normal(layer_3_weight_shape, stddev=0.1))
]
helper.compare_init_weights(
mnist,
'均匀分布 vs 截尾正态分布',
[
(basline_weights, '均匀分布'),
(trunc_normal_01_weights, '截尾正态分布')])
if __name__=='__main__':
# weight_all_0or1()
# uniform_histgram()
# weights_random_uniform() # 图中:损失一直在下降,表明我们网络正在学习。明显好于全0或全1.这是一个好方向。
# weights_random_uniform1()
# weights_random_uniform2()
# weights_random_uniform3() # [-0.1 ,0.1] 和我们的通用法则 $y=1/\sqrt{n}$ 的数值非常接近,所以效果都不错。
# random_uniform_histgram()
# weights_random_uniform4()
# truncated_normal_histgram()
weights_random_uniform5()
#weights_random_uniform6()
helper代码
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
def hist_dist(title, distribution_tensor, hist_range=(-4, 4)):
"""
Display histogram of a TF distribution
"""
with tf.Session() as sess:
values = sess.run(distribution_tensor)
plt.title(title)
plt.hist(values, np.linspace(*hist_range, num=len(values)/2))
plt.show()
def _get_loss_acc(dataset, weights):
"""
Get losses and validation accuracy of example neural network
"""
batch_size = 128
epochs = 2
learning_rate = 0.001
features = tf.placeholder(tf.float32)
labels = tf.placeholder(tf.float32)
learn_rate = tf.placeholder(tf.float32)
# todo- 这里只定义了 偏置项 初始值,权重值 有参数传入进来!!!
biases = [
tf.Variable(tf.zeros([256])),
tf.Variable(tf.zeros([128])),
tf.Variable(tf.zeros([dataset.train.labels.shape[1]]))
]
# todo-网络,共3层,2层隐藏层,1层输出层。
layer_1 = tf.nn.relu(tf.matmul(features, weights[0]) + biases[0])
layer_2 = tf.nn.relu(tf.matmul(layer_1, weights[1]) + biases[1])
logits = tf.matmul(layer_2, weights[2]) + biases[2]
# Training loss
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels))
# Optimizer
optimizer = tf.train.AdamOptimizer(learn_rate).minimize(loss)
# Accuracy
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# Measurements use for graphing loss
loss_batch = []
with tf.Session() as session:
session.run(tf.global_variables_initializer())
# todo-求有多少个batch_size(即每1个epoch的步数) = 用总样本数量/batch_size
batch_count = int((dataset.train.num_examples / batch_size))
# 训练
for epoch_i in range(epochs):
for batch_i in range(batch_count):
batch_features, batch_labels = dataset.train.next_batch(batch_size)
# Run optimizer and get loss
session.run(
optimizer,
feed_dict={features: batch_features, labels: batch_labels, learn_rate: learning_rate})
l = session.run(
loss,
feed_dict={features: batch_features, labels: batch_labels, learn_rate: learning_rate})
loss_batch.append(l)
valid_acc = session.run(
accuracy,
feed_dict={features: dataset.validation.images, labels: dataset.validation.labels, learn_rate: 1.0})
# Hack to Reset batches
dataset.train._index_in_epoch = 0
dataset.train._epochs_completed = 0
return loss_batch, valid_acc
def compare_init_weights(
dataset,
title,
weight_init_list,
plot_n_batches=100):
"""
Plot loss and print stats of weights using an example neural network
"""
colors = ['r', 'b', 'g', 'c', 'y', 'k']
label_accs = []
label_loss = []
assert len(weight_init_list) <= len(colors), 'Too many inital weights to plot'
for i, (weights, label) in enumerate(weight_init_list):
loss, val_acc = _get_loss_acc(dataset, weights)
plt.plot(loss[:plot_n_batches], colors[i], label=label)
label_accs.append((label, val_acc))
label_loss.append((label, loss[-1]))
plt.title(title)
plt.xlabel('Batches')
plt.ylabel('Loss')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.show()
print('After 858 Batches (2 Epochs):')
print('Validation Accuracy')
for label, val_acc in label_accs:
print(' {:7.3f}% -- {}'.format(val_acc*100, label))
print('Loss')
for label, loss in label_loss:
print(' {:7.3f} -- {}'.format(loss, label))
D:\Anaconda\python.exe D:/AI20/HJZ/04-深度学习/3-CNN/20191207/2019--权重初始化/02Weight_Initial.py
Getting MNIST Dataset...
WARNING:tensorflow:From D:/AI20/HJZ/04-深度学习/3-CNN/20191207/2019--权重初始化/02Weight_Initial.py:23: read_data_sets (from tensorflow.contrib.learn.python.learn.datasets.mnist) is deprecated and will be removed in a future version.
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
WARNING:tensorflow:From D:\Anaconda\lib\site-packages\tensorflow\contrib\learn\python\learn\datasets\mnist.py:260: maybe_download (from tensorflow.contrib.learn.python.learn.datasets.base) is deprecated and will be removed in a future version.
Instructions for updating:
Please write your own downloading logic.
WARNING:tensorflow:From D:\Anaconda\lib\site-packages\tensorflow\contrib\learn\python\learn\datasets\mnist.py:262: extract_images (from tensorflow.contrib.learn.python.learn.datasets.mnist) is deprecated and will be removed in a future version.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting ./datasets\train-images-idx3-ubyte.gz
WARNING:tensorflow:From D:\Anaconda\lib\site-packages\tensorflow\contrib\learn\python\learn\datasets\mnist.py:267: extract_labels (from tensorflow.contrib.learn.python.learn.datasets.mnist) is deprecated and will be removed in a future version.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting ./datasets\train-labels-idx1-ubyte.gz
WARNING:tensorflow:From D:\Anaconda\lib\site-packages\tensorflow\contrib\learn\python\learn\datasets\mnist.py:110: dense_to_one_hot (from tensorflow.contrib.learn.python.learn.datasets.mnist) is deprecated and will be removed in a future version.
Instructions for updating:
Please use tf.one_hot on tensors.
Extracting ./datasets\t10k-images-idx3-ubyte.gz
Extracting ./datasets\t10k-labels-idx1-ubyte.gz
WARNING:tensorflow:From D:\Anaconda\lib\site-packages\tensorflow\contrib\learn\python\learn\datasets\mnist.py:290: DataSet.__init__ (from tensorflow.contrib.learn.python.learn.datasets.mnist) is deprecated and will be removed in a future version.
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Data Extracted.
WARNING:tensorflow:From D:\AI20\HJZ\04-深度学习\3-CNN\20191207\2019--权重初始化\helper.py:43: softmax_cross_entropy_with_logits (from tensorflow.python.ops.nn_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.
See @{tf.nn.softmax_cross_entropy_with_logits_v2}.
2019-12-28 12:13:23.268683: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX AVX2
After 858 Batches (2 Epochs):
Validation Accuracy
96.820% -- 正态分布
97.000% -- 截尾正态分布
Loss
0.042 -- 正态分布
0.098 -- 截尾正态分布
Process finished with exit code 0