# 【深度学习图像识别课程】权重初始化

%matplotlib inline

import tensorflow as tf
import helper

from tensorflow.examples.tutorials.mnist import input_data

print('Getting MNIST Dataset...')
print('Data Extracted.')
# Save the shapes of weights for each layer
layer_1_weight_shape = (mnist.train.images.shape[1], 256)
layer_2_weight_shape = (256, 128)
layer_3_weight_shape = (128, mnist.train.labels.shape[1])

1、全0或者全1

all_zero_weights = [
tf.Variable(tf.zeros(layer_1_weight_shape)),
tf.Variable(tf.zeros(layer_2_weight_shape)),
tf.Variable(tf.zeros(layer_3_weight_shape))
]

all_one_weights = [
tf.Variable(tf.ones(layer_1_weight_shape)),
tf.Variable(tf.ones(layer_2_weight_shape)),
tf.Variable(tf.ones(layer_3_weight_shape))
]

helper.compare_init_weights(
mnist,
'All Zeros vs All Ones',
[
(all_zero_weights, 'All Zeros'),
(all_one_weights, 'All Ones')])

After 858 Batches (2 Epochs):
Validation Accuracy
11.260% -- All Zeros
9.900% -- All Ones
Loss
2.300  -- All Zeros
372.644  -- All Ones

2、均匀分布

helper.hist_dist('Random Uniform (minval=-3, maxval=3)', tf.random_uniform([1000], -3, 3))

tf.random_uniform(shape, minval=0, maxval=None, dtype=tf.float32, seed=None, name=None)

• shape: 输出个数
• minval: 随机值范围的下界限。默认为0。
• maxval: 随机值范围的上界限。默认为1。
• dtype: 输出类型: float32, float64, int32, or int64.
• seed: 产生随机分布的种子
• name:

（1）权重设置范围[0,1]

# Default for tf.random_uniform is minval=0 and maxval=1
basline_weights = [
tf.Variable(tf.random_uniform(layer_1_weight_shape)),
tf.Variable(tf.random_uniform(layer_2_weight_shape)),
tf.Variable(tf.random_uniform(layer_3_weight_shape))
]

helper.compare_init_weights(
mnist,
'Baseline',
[(basline_weights, 'tf.random_uniform [0, 1)')])

After 858 Batches (2 Epochs):
Validation Accuracy
65.340% -- tf.random_uniform [0, 1)
Loss
64.356  -- tf.random_uniform [0, 1)

（2）权重设置范围[-1,1]

uniform_neg1to1_weights = [
tf.Variable(tf.random_uniform(layer_1_weight_shape, -1, 1)),
tf.Variable(tf.random_uniform(layer_2_weight_shape, -1, 1)),
tf.Variable(tf.random_uniform(layer_3_weight_shape, -1, 1))
]

helper.compare_init_weights(
mnist,
'[0, 1) vs [-1, 1)',
[
(basline_weights, 'tf.random_uniform [0, 1)'),
(uniform_neg1to1_weights, 'tf.random_uniform [-1, 1)')])

After 858 Batches (2 Epochs):
Validation Accuracy
73.840% -- tf.random_uniform [0, 1)
89.360% -- tf.random_uniform [-1, 1)
Loss
13.700  -- tf.random_uniform [0, 1)
5.470  -- tf.random_uniform [-1, 1)

3、权重范围

（1）再增加3个范围：-0.1~0,1，-0.01~0.01，-0.001~0.001

uniform_neg01to01_weights = [
tf.Variable(tf.random_uniform(layer_1_weight_shape, -0.1, 0.1)),
tf.Variable(tf.random_uniform(layer_2_weight_shape, -0.1, 0.1)),
tf.Variable(tf.random_uniform(layer_3_weight_shape, -0.1, 0.1))
]

uniform_neg001to001_weights = [
tf.Variable(tf.random_uniform(layer_1_weight_shape, -0.01, 0.01)),
tf.Variable(tf.random_uniform(layer_2_weight_shape, -0.01, 0.01)),
tf.Variable(tf.random_uniform(layer_3_weight_shape, -0.01, 0.01))
]

uniform_neg0001to0001_weights = [
tf.Variable(tf.random_uniform(layer_1_weight_shape, -0.001, 0.001)),
tf.Variable(tf.random_uniform(layer_2_weight_shape, -0.001, 0.001)),
tf.Variable(tf.random_uniform(layer_3_weight_shape, -0.001, 0.001))
]

helper.compare_init_weights(
mnist,
'[-1, 1) vs [-0.1, 0.1) vs [-0.01, 0.01) vs [-0.001, 0.001)',
[
(uniform_neg1to1_weights, '[-1, 1)'),
(uniform_neg01to01_weights, '[-0.1, 0.1)'),
(uniform_neg001to001_weights, '[-0.01, 0.01)'),
(uniform_neg0001to0001_weights, '[-0.001, 0.001)')],
plot_n_batches=None)

After 858 Batches (2 Epochs):
Validation Accuracy
91.000% -- [-1, 1)
97.220% -- [-0.1, 0.1)
95.680% -- [-0.01, 0.01)
94.400% -- [-0.001, 0.001)
Loss
2.425  -- [-1, 1)
0.098  -- [-0.1, 0.1)
0.133  -- [-0.01, 0.01)
0.190  -- [-0.001, 0.001)

（2）将范围（-0.1, 0.1）与约定的一般范围进行比较

import numpy as np

general_rule_weights = [
tf.Variable(tf.random_uniform(layer_1_weight_shape, -1/np.sqrt(layer_1_weight_shape[0]), 1/np.sqrt(layer_1_weight_shape[0]))),
tf.Variable(tf.random_uniform(layer_2_weight_shape, -1/np.sqrt(layer_2_weight_shape[0]), 1/np.sqrt(layer_2_weight_shape[0]))),
tf.Variable(tf.random_uniform(layer_3_weight_shape, -1/np.sqrt(layer_3_weight_shape[0]), 1/np.sqrt(layer_3_weight_shape[0])))
]

helper.compare_init_weights(
mnist,
'[-0.1, 0.1) vs General Rule',
[
(uniform_neg01to01_weights, '[-0.1, 0.1)'),
(general_rule_weights, 'General Rule')],
plot_n_batches=None)

4、正态分布作为权重

（1）tensorflow中提供正态分布的函数

tf.random_normal(shape, mean=0.0, stddev=1.0, dtype=tf.float32, seed=None, name=None)

• shape:输出大小
• mean:正态分布的均值
• stddev:正态分布的标准差
• dtype: 输出类型
• seed: 产生分布的随机种子
• name:
helper.hist_dist('Random Normal (mean=0.0, stddev=1.0)', tf.random_normal([1000]))

normal_01_weights = [
tf.Variable(tf.random_normal(layer_1_weight_shape, stddev=0.1)),
tf.Variable(tf.random_normal(layer_2_weight_shape, stddev=0.1)),
tf.Variable(tf.random_normal(layer_3_weight_shape, stddev=0.1))
]

helper.compare_init_weights(
mnist,
'Uniform [-0.1, 0.1) vs Normal stddev 0.1',
[
(uniform_neg01to01_weights, 'Uniform [-0.1, 0.1)'),
(normal_01_weights, 'Normal stddev 0.1')])

After 858 Batches (2 Epochs):
Validation Accuracy
96.920% -- Uniform [-0.1, 0.1)
97.200% -- Normal stddev 0.1
Loss
0.103  -- Uniform [-0.1, 0.1)
0.099  -- Normal stddev 0.1

（2）截断正态分布：truncated normal distribution

tf.truncated_normal(shape, mean=0.0, stddev=1.0, dtype=tf.float32, seed=None, name=None)

• shape: 输出格式
• mean: 截断正态分布的均值
• stddev: 截断正态分布的标准差
• dtype: 输出类型
• seed: 产生分布的随机种子
• name: 操作名称
helper.hist_dist('Truncated Normal (mean=0.0, stddev=1.0)', tf.truncated_normal([1000]))

trunc_normal_01_weights = [
tf.Variable(tf.truncated_normal(layer_1_weight_shape, stddev=0.1)),
tf.Variable(tf.truncated_normal(layer_2_weight_shape, stddev=0.1)),
tf.Variable(tf.truncated_normal(layer_3_weight_shape, stddev=0.1))
]

helper.compare_init_weights(
mnist,
'Normal vs Truncated Normal',
[
(normal_01_weights, 'Normal'),
(trunc_normal_01_weights, 'Truncated Normal')])

After 858 Batches (2 Epochs):
Validation Accuracy
97.020% -- Normal
97.480% -- Truncated Normal
Loss
0.088  -- Normal
0.034  -- Truncated Normal

helper.compare_init_weights(
mnist,
'Baseline vs Truncated Normal',
[
(basline_weights, 'Baseline'),
(trunc_normal_01_weights, 'Truncated Normal')])

After 858 Batches (2 Epochs):
Validation Accuracy
66.100% -- Baseline
97.040% -- Truncated Normal
Loss
24.090  -- Baseline
0.075  -- Truncated Normal

http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf

https://arxiv.org/pdf/1502.01852v1.pdf

https://arxiv.org/pdf/1502.03167v2.pdf