单层三神经元网络
无激活函数
损失函数为交叉熵
优化器为Adam
import numpy as np
from sklearn import datasets
import tensorflow as tf
from matplotlib import pyplot as plt
iris = datasets.load_iris()
data = iris.data
target = iris.target
# shuffling dataset
seed = 116 # using same seed in order to data match target
np.random.seed(seed)
np.random.shuffle(data)
np.random.seed(seed)
np.random.shuffle(target)
tf.random.set_seed(seed)
# partitioning dataset
train_data = data[:-30]
train_target = target[:-30]
test_data = data[-30:]
test_target = target[-30:]
# casting
train_data = tf.cast(train_data, tf.float32)
test_data = tf.cast(test_data, tf.float32)
# one hot
train_target = tf.one_hot(train_target, depth=3)
# packaging data
batch = 32
train_slices = tf.data.Dataset.from_tensor_slices((train_data, train_target)).batch(batch)
test_slices = tf.data.Dataset.from_tensor_slices((test_data, test_target)).batch(batch)
# defining trainable variable
stddev = np.sqrt(np.divide(1., 4.))
weight = tf.Variable(tf.random.truncated_normal((4, 3), stddev=stddev))
biases = tf.Variable(tf.random.truncated_normal((3, ), stddev=stddev))
variables = [weight, biases]
# iteration
epoch = 50
learning_rate = .1
recorded_loss = []
recorded_accuracy = []
counter = 0
alpha = 0.9
beta = 0.999
first_pro = []
second_pro = []
for variable in variables:
first_pro.append(np.zeros(variable.shape))
second_pro.append(np.zeros(variable.shape))
for epoch in range(epoch):
# training
total_loss = 0
for data, target in train_slices:
counter += 1
with tf.GradientTape() as tape:
prediction = tf.matmul(data, weight) + biases # forward propagation in the network
loss = tf.reduce_mean(tf.losses.categorical_crossentropy(target, tf.nn.softmax(prediction)))
total_loss += float(loss)
grads = tape.gradient(loss, variables)
for index, variable in enumerate(variables):
first_pro[index] = alpha * first_pro[index] + (1-alpha) * grads[index]
second_pro[index] = beta * second_pro[index] + (1-beta) * tf.square(grads[index])
first = first_pro[index] / (1-tf.pow(alpha, counter))
second = second_pro[index] / (1-tf.pow(beta, counter))
variable.assign_sub(learning_rate * first / tf.sqrt(second))
recorded_loss.append(total_loss)
# testing
total_correct, total_number = 0, 0
for data, target in test_slices:
prediction = tf.matmul(data, weight) + biases
prediction = tf.argmax(prediction, axis=1) # get the index of result with the highest probability in each row
prediction = tf.cast(prediction, dtype=target.dtype)
correct = tf.cast(tf.equal(prediction, target), dtype=tf.int32)
correct = tf.reduce_sum(correct)
total_number += data.shape[0]
total_correct += int(correct)
accuracy = total_correct/total_number
recorded_accuracy.append(accuracy)
print('Epoch: {}, Loss: {}, Accuracy: {}'.format(epoch, total_loss, accuracy))
# output
plt.subplot(121)
plt.title('Loss Function Curve')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.plot(recorded_loss, label='$Loss$')
plt.legend()
plt.subplot(122)
plt.title('Accuracy Curve')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.plot(recorded_accuracy, label='$Accuracy$')
plt.legend()
plt.show()