import numpy as np
import tensorflow as tf
import keras
import matplotlib.pyplot as plt
# Define a Neural Accumulator (NAC) for addition/subtraction -> Useful to learn the addition/subtraction operation
def nac_simple_single_layer(x_in, out_units):
'''
Define a Neural Accumulator (NAC) for addition/subtraction -> Useful to learn the addition/subtraction operation
Attributes:
x_in -> Input vector
out_units -> number of output neurons
Return:
Output tensor of mentioned shsape and associated weights
'''
in_features = x_in.shape[1]
# define W_hat and M_hat
W_hat = tf.get_variable(name = "W_hat", initializer=tf.initializers.random_uniform(minval=-2, maxval=2),shape=[in_features, out_units], trainable=True)
M_hat = tf.get_variable(name = "M_hat", initializer=tf.initializers.random_uniform(minval=-2, maxval=2), shape=[in_features, out_units], trainable=True)
# Get W
W = tf.nn.tanh(W_hat) * tf.nn.sigmoid(M_hat)
y_out = tf.matmul(x_in,W)
return y_out,W
# define a complex nac in log space -> for more complex arithmetic functions such as
# multiplication, division and power
def nac_complex_single_layer(x_in, out_units, epsilon = 0.000001):
'''
:param x_in: input feature vector
:param out_units: number of output units of the cell
:param epsilon: small value to avoid log(0) in the output result
:return: associated weight matrix and output tensor
'''
in_shape = x_in.shape[1]
W_hat = tf.get_variable(shape=[in_shape, out_units],
initializer= tf.initializers.random_uniform(minval=-2, maxval=2),
trainable=True, name="W_hat2")
M_hat = tf.get_variable(shape=[in_shape, out_units],
initializer=tf.initializers.random_uniform(minval=-2, maxval=2),
trainable=True, name="M_hat2")
W = tf.nn.tanh(W_hat) * tf.nn.sigmoid(M_hat)
# Express Input feature in log space to learn complex functions
x_modified = tf.log(tf.abs(x_in) + epsilon)
m = tf.exp( tf.matmul(x_modified, W) )
return m, W
# Define a NALU having combination of NAC1 and NAC2
def nalu(x_in, out_units, epsilon=0.000001, get_weights=False):
'''
:param x_in: input feature vector
:param out_units: number of output units of the cell
:param epsilon: small value to avoid log(0) in the output result
:param get_weights: True if want to get the weights of the model
in return
:return: output tensor
:return: Gate weight matrix
:return: NAC1 (simple NAC) weight matrix
:return: NAC2 (complex NAC) weight matrix
'''
in_shape = x_in.shape[1]
# Get output of NAC1
a, W_simple = nac_simple_single_layer(x_in, out_units)
# Get output of NAC2
m, W_complex = nac_complex_single_layer(x_in, out_units, epsilon= epsilon)
# Gate signal layer
G = tf.get_variable(initializer=tf.random_normal_initializer(stddev=1.0),
shape=[in_shape, out_units], name="Gate_weights")
g = tf.nn.sigmoid( tf.matmul(x_in, G) )
y_out = g * a + (1 - g) * m
if(get_weights):
return y_out, G, W_simple, W_complex
else:
return y_out
# Test the Network by learning the adition
# Generate a series of input number X1,X2 and X3 for training
#x1 = np.arange(1000,11000, step=5, dtype= np.float32)
#x2 = np.arange(500, 6500 , step=3, dtype= np.float32)
#x3 = np.arange(0, 2000, step = 1, dtype= np.float32)
x1 = np.random.randint(0,1000, size= 5000).astype(np.float32)
x2 = np.random.randint(1, 500, size=5000).astype(np.float32)
x3 = np.random.randint(50, 150 , size=5000).astype(np.float32)
# Make any function of x1,x2 and x3 to try the network on
y_train = (x1/4) + (x2/2) + x3**2
#y_train = x1 + x2 + x3
x_train = np.column_stack( (x1,x2,x3) )
print(x_train.shape)
print(y_train.shape)
# Generate a series of input number X1,X2 and X3 for testing
x1 = np.random.randint(0,1000, size= 200).astype(np.float32)
x2 = np.random.randint(1, 500, size=200).astype(np.float32)
x3 = np.random.randint(50, 150 , size=200).astype(np.float32)
x_test = np.column_stack((x1,x2,x3))
y_test = (x1/4) + (x2/2) + x3**2
#y_test = x1 + x2 + x3
print()
print(x_test.shape)
print(y_test.shape)
# Define the placeholder to feed the value at run time
X = tf.placeholder(dtype=tf.float32, shape =[None , 3]) # Number of samples x Number of features (number of inputs to be added)
Y = tf.placeholder(dtype=tf.float32, shape=[None,])
# define the network
# Here the network contains only one NAC cell (for testing)
y_pred = nalu(X, out_units=1)
y_pred = tf.squeeze(y_pred) # Remove extra dimensions if any
# Mean Square Error (MSE)
loss = tf.reduce_mean( (y_pred - Y) **2)
#loss= tf.losses.mean_squared_error(labels=y_train, predictions=y_pred)
# training parameters
#alpha = 0.005 # learning rate
alpha = 0.005
momentum = 0.09
epochs = 30000
#optimize = tf.train.MomentumOptimizer(learning_rate=alpha,momentum=momentum).minimize(loss)
optimize = tf.train.AdamOptimizer(learning_rate=alpha).minimize(loss)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
# pre training evaluate
print("Pre training MSE: ", sess.run (loss, feed_dict={X: x_test, Y:y_test}))
print()
cost_history = []
for i in range(epochs):
_, cost = sess.run([optimize, loss], feed_dict={X: x_train, Y: y_train})
print("epoch: {}, MSE: {}".format(i, cost))
cost_history.append(cost)
# plot the MSE over each iteration
plt.plot(np.arange(epochs),np.log(cost_history)) # Plot MSE on log scale
plt.xlabel("Epoch")
plt.ylabel("MSE")
plt.show()
print()
#print(W.eval())
#print()
# post training loss
print("Post training MSE: ", sess.run(loss, feed_dict={X: x_test, Y: y_test}))
print("Actual sum: ", y_test[0:10])
print()
y_hat = sess.run(y_pred, feed_dict={X: x_test, Y: y_test})
print("Predicted sum: ", y_hat[0:10] )