Build your own CNN and try to achieve the highest possible accuracy on MNIST.
import tensorflow as tf
import numpy as np
from datetime import datetime
import os
def shuffle_batch(X, y, batch_size):
rnd_idx = np.random.permutation(len(X))
n_batches = len(X) // batch_size
for batch_idx in np.array_split(rnd_idx, n_batches):
X_batch, y_batch = X[batch_idx], y[batch_idx]
yield X_batch, y_batch
height = 28
width = 28
channels = 1
n_inputs = height * width
n_outputs = 10
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train = X_train.astype(np.float32).reshape(-1, 28*28) / 255.0
X_test = X_test.astype(np.float32).reshape(-1, 28*28) / 255.0
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)
X_valid, X_train = X_train[:5000], X_train[5000:]
y_valid, y_train = y_train[:5000], y_train[5000:]
with tf.name_scope("inputs"):
X = tf.placeholder(tf.float32, shape=[None, n_inputs], name="X")
X_reshaped = tf.reshape(X, shape=[-1, height, width, channels])
y = tf.placeholder(tf.int32, shape=[None], name="y")
#input:[batch, 28, 28, 1]
#output:[batch, 28, 28, 32]
conv1 = tf.layers.conv2d(X_reshaped, filters=32, kernel_size=3,
strides=1, padding="SAME",
activation=tf.nn.relu, name="conv1")
#input:[batch, 28, 28, 32]
#output:[batch, 14, 14, 32]
pool1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")
#input:[batch, 14, 14, 32]
#output:[batch, 14, 14, 64]
conv2 = tf.layers.conv2d(pool1, filters=64, kernel_size=3,
strides=1, padding="SAME",
activation=tf.nn.relu, name="conv2")
#input:[batch, 14, 14, 64]
#output:[batch, 7, 7, 64]
pool2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")
with tf.name_scope("pool2"):
pool2_flat = tf.reshape(pool2, shape=[-1, 64 * 7 * 7])
with tf.name_scope("fc1"):
fc1 = tf.layers.dense(pool2_flat, 64, activation=tf.nn.relu, name="fc1")
with tf.name_scope("output"):
logits = tf.layers.dense(fc1, n_outputs, name="output")
Y_proba = tf.nn.softmax(logits, name="Y_proba")
with tf.name_scope("train"):
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y)
loss = tf.reduce_mean(xentropy)
optimizer = tf.train.AdamOptimizer()
training_op = optimizer.minimize(loss)
with tf.name_scope("eval"):
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
with tf.name_scope("init_and_save"):
init = tf.global_variables_initializer()
saver = tf.train.Saver()
n_epochs = 15
batch_size = 100
with tf.Session() as sess:
for epoch in range(n_epochs):
for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):, feed_dict={ X: X_batch, y: y_batch})
acc_train = accuracy.eval(feed_dict={ X: X_batch, y: y_batch})
acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})
print(epoch, "Train accuracy:", acc_train, "Test accuracy:", acc_test)
save_path =, "./my_mnist_model")
log info:
0 Train accuracy: 0.98 Test accuracy: 0.9762
1 Train accuracy: 0.99 Test accuracy: 0.985
2 Train accuracy: 0.98 Test accuracy: 0.9873
3 Train accuracy: 1.0 Test accuracy: 0.9889
4 Train accuracy: 1.0 Test accuracy: 0.9883
5 Train accuracy: 1.0 Test accuracy: 0.9896
6 Train accuracy: 1.0 Test accuracy: 0.9901
7 Train accuracy: 0.99 Test accuracy: 0.9902
8 Train accuracy: 1.0 Test accuracy: 0.9904
9 Train accuracy: 1.0 Test accuracy: 0.9911
10 Train accuracy: 1.0 Test accuracy: 0.9923
11 Train accuracy: 1.0 Test accuracy: 0.9904
12 Train accuracy: 1.0 Test accuracy: 0.991
13 Train accuracy: 1.0 Test accuracy: 0.989
14 Train accuracy: 1.0 Test accuracy: 0.9907