Xavier initializer
https://github.com/google/prettytensor/blob/a69f13998258165d6682a47a931108d974bab05e/prettytensor/layers.py
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.examples.tutorials.mnist import input_data
hidden_layer_size = [256]*4
input_layer_size = 784
output_layer_size = 10
mnist = input_data.read_data_sets('data/', one_hot=True)
train_img = mnist.train.images
train_lbl = mnist.train.labels
test_img = mnist.test.images
test_lbl = mnist.test.labels
def xavier_init(n_inputs, n_outputs, uniform=True):
if uniform:
init_range = tf.sqrt(6.0 / (n_inputs+ n_outputs))
return tf.random_uniform_initializer(-init_range, init_range)
else:
stddev = tf.sqrt(3.0 / (n_inputs + n_outputs))
return tf.truncated_normal_initializer(stddev=stddev)
def initial_weights(ils, hls, ols):
weights,bias = {}, {}
for i in xrange(len(hls)+1):
fan_in = ils if i==0 else hls[i-1]
fan_out = ols if i==len(hls) else hls[i]
print fan_in, fan_out
stddev = np.sqrt(1.0 / (2 * fan_in))
weights[i] = tf.Variable(tf.random_normal([fan_in, fan_out], stddev=stddev))
bias[i] = tf.Variable(tf.random_normal([fan_out]))
return weights, bias
def mlp(_x, _w, _b, _keep_prob):
layers = {}
for i in xrange(len(_w)):
if i == 0:
layers[i] = tf.nn.dropout(tf.nn.relu(tf.add(tf.matmul(_x, _w[i]), _b[i])), _keep_prob)
elif i < len(_w)-1:
layers[i] = tf.nn.dropout(tf.nn.relu(tf.add(tf.matmul(layers[i-1], _w[i]), _b[i])), _keep_prob)
else:
layers[i] = tf.add(tf.matmul(layers[i-1], _w[i]), _b[i])
return layers[len(_w) - 1]
weights, bias = initial_weights(input_layer_size, hidden_layer_size, output_layer_size)
x = tf.placeholder(tf.float32, [None, input_layer_size], name='input')
y = tf.placeholder(tf.float32, [None, output_layer_size], name='output')
dropout_keep_prob = tf.placeholder(tf.float32)
score = mlp(x, weights, bias, dropout_keep_prob)
prob = tf.nn.softmax(score)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(score, y))
lr = 0.001
optimizer = tf.train.AdamOptimizer(lr).minimize(loss)
pred = tf.equal(tf.argmax(prob, 1), tf.argmax(y,1))
acc = tf.reduce_mean(tf.cast(pred, tf.float32))
init = tf.initialize_all_variables()
epoch = 100
batch_size = 200
snapshot = 5
sess = tf.Session()
with tf.Session() as sess:
sess.run(init)
loss_cache = []
acc_cache = []
for ep in xrange(epoch):
num_batch = mnist.train.num_examples/batch_size
avg_loss, avg_acc = 0, 0
for nb in xrange(num_batch):
batch_x, batch_y = mnist.train.next_batch(batch_size)
out = sess.run([optimizer, acc, loss], feed_dict={x:batch_x, y:batch_y, dropout_keep_prob:0.7})
avg_loss += out[2]/num_batch
avg_acc += out[1]/num_batch
loss_cache.append(avg_loss)
acc_cache.append(avg_acc)
if ep % snapshot ==0:
print 'Epoch: %d, loss: %.4f, acc: %.4f'%(ep, avg_loss, acc_cache[-1])
print 'test accuracy:' , acc.eval({x:test_img, y:test_lbl, dropout_keep_prob:1.0})
plt.figure(1)
plt.plot(range(len(loss_cache)), loss_cache, 'b-', label='loss')
plt.legend(loc = 'upper right')
plt.figure(2)
plt.plot(range(len(acc_cache)), acc_cache, 'o-', label='acc')
plt.legend(loc = 'lower right')
plt.show()