本文通过实际例子,演示如何通过TensorFlow完成数据处理(包括normalizatoin)、最优化Optimization
一、数据导入
# Packages
import h5py
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.python.framework.ops import EagerTensor
from tensorflow.python.ops.resource_variable_ops import ResourceVariable
import time
# checking TensorFlow Version
tf.__version__
如果数据一开始以dataset的形式储存,则可以利用tf.data.Dataset.from_tensor_slices将数据切分
# 数据储存在 train_dataset 和 test_dataset 中
# 首先利用 tf.data.Dataset.from_tensor_slice() 将数据切分
x_train = tf.data.Dataset.from_tensor_slices(train_dataset['train_set_x'])
y_train = tf.data.Dataset.from_tensor_slices(train_dataset['train_set_y'])
x_test = tf.data.Dataset.from_tensor_slices(test_dataset['test_set_x'])
y_test = tf.data.Dataset.from_tensor_slices(test_dataset['test_set_y'])
type(x_train) # tensorflow.python.data.ops.dataset_ops.TensorSliceDataset
然后确定不同种类的label数量
unique_labels = set()
for element in y_train:
unique_labels.add(element.numpy())
print(unique_labels)
最后将数据进行一般化normalization:
def normalize(image):
# 首先将image转换成float32,然后/255进行一般化
image = tf.cast(image, tf.float32) / 255.0
# 完成后将image转换成column的形式
image = tf.reshape(image, [-1,])
return image
# map(function): 将function作用于每一个元素后返回列表
new_train = x_train.map(normalize)
new_test = x_test.map(normalize)
二、Basic Optimization with GradientTape
1. Linear Function
首先是简单的 Y = WX + b
def linear_function():
"""
Implements a linear function:
Initializes X to be a random tensor of shape (3,1)
Initializes W to be a random tensor of shape (4,3)
Initializes b to be a random tensor of shape (4,1)
Returns:
result -- Y = WX + b
"""
np.random.seed(1)
"""
Note, to ensure that the "random" numbers generated match the expected results,
please create the variables in the order given in the starting code below.
(Do not re-arrange the order).
"""
# (approx. 4 lines)
X = tf.constant(np.random.randn(3,1), name = "X")
W = tf.constant(np.random.randn(4,3), name = "W")
b = tf.constant(np.random.randn(4,1), name = "b")
Y = tf.add(tf.matmul(W,X),b)
return Y
2. sigmoid function
def sigmoid(z):
"""
Computes the sigmoid of z
Arguments:
z -- input value, scalar or vector
Returns:
a -- (tf.float32) the sigmoid of z
"""
# tf.keras.activations.sigmoid requires float16, float32, float64, complex64, or complex128.
# (approx. 2 lines)
z = tf.cast(z, tf.float32)
a = tf.keras.activations.sigmoid(z)
return a
3. One Hot Encodings
y_train中的元素为 [0, C-1],而模型预测出的结果为一个 1*C的向量,下面我们通过one_hot将y_train转换成相同形式
def one_hot_matrix(label, depth=6):
"""
Computes the one hot encoding for a single label
Arguments:
label -- (int) Categorical labels
depth -- (int) Number of different classes that label can take
Returns:
one_hot -- tf.Tensor A single-column matrix with the one hot encoding.
"""
# (approx. 1 line)
one_hot = tf.reshape(tf.one_hot(label, depth, axis=0), shape=[-1, ])
return one_hot
new_y_test = y_test.map(one_hot_matrix)
new_y_train = y_train.map(one_hot_matrix)
4. Initialize parameters
初始化参数可以通过 tf.Variable(initializer(shape=())进行,其中intializer选择 tf.keras.intializer.GlorotNormal(seed = 1)
def initialize_parameters():
"""
Initializes parameters to build a neural network with TensorFlow. The shapes are:
W1 : [25, 12288]
b1 : [25, 1]
W2 : [12, 25]
b2 : [12, 1]
W3 : [6, 12]
b3 : [6, 1]
Returns:
parameters -- a dictionary of tensors containing W1, b1, W2, b2, W3, b3
"""
initializer = tf.keras.initializers.GlorotNormal(seed=1)
#(approx. 6 lines of code)
W1 = tf.Variable(initializer(shape=([25, 12288])))
b1 = tf.Variable(initializer(shape=([25, 1])))
W2 = tf.Variable(initializer(shape=([12, 25])))
b2 = tf.Variable(initializer(shape=([12, 1])))
W3 = tf.Variable(initializer(shape=([6, 12])))
b3 = tf.Variable(initializer(shape=([6, 1])))
parameters = {"W1": W1,
"b1": b1,
"W2": W2,
"b2": b2,
"W3": W3,
"b3": b3}
return parameters
parameters = initialize_parameters()
三、Building Neural Network
1. Implement Forward Propagation
计算时使用TensorFlow提供的相关API:
tf.math.add(a, b)
tf.linalg.matmul(a, b)
tf.keras.activations.relu(z)
def forward_propagation(X, parameters):
"""
Implements the forward propagation for the model: LINEAR -> RELU -> LINEAR -> RELU -> LINEAR
Arguments:
X -- input dataset placeholder, of shape (input size, number of examples)
parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2", "W3", "b3"
the shapes are given in initialize_parameters
Returns:
Z3 -- the output of the last LINEAR unit
"""
# Retrieve the parameters from the dictionary "parameters"
W1 = parameters['W1']
b1 = parameters['b1']
W2 = parameters['W2']
b2 = parameters['b2']
W3 = parameters['W3']
b3 = parameters['b3']
#(approx. 5 lines) # Numpy Equivalents:
Z1 = tf.math.add(tf.linalg.matmul(W1, X), b1) # Z1 = np.dot(W1, X) + b1
A1 = tf.keras.activations.relu(Z1) # A1 = relu(Z1)
Z2 = tf.math.add(tf.linalg.matmul(W2, A1), b2) # Z2 = np.dot(W2, A1) + b2
A2 = tf.keras.activations.relu(Z2) # A2 = relu(Z2)
Z3 = tf.math.add(tf.linalg.matmul(W3, A2), b3) # Z3 = np.dot(W3, A2) + b3
return Z3
2. Compute total loss
def compute_total_loss(logits, labels):
"""
Computes the total loss
Arguments:
logits -- output of forward propagation (output of the last LINEAR unit), of shape (6, num_examples)
labels -- "true" labels vector, same shape as Z3
Returns:
total_loss - Tensor of the total loss value
"""
total_loss = tf.reduce_sum(tf.keras.losses.categorical_crossentropy(tf.transpose(labels), tf.transpose(logits), from_logits = True))
return total_loss
3. Train the Model
def model(X_train, Y_train, X_test, Y_test, learning_rate = 0.0001,
num_epochs = 1500, minibatch_size = 32, print_cost = True):
"""
Implements a three-layer tensorflow neural network: LINEAR->RELU->LINEAR->RELU->LINEAR->SOFTMAX.
Arguments:
X_train -- training set, of shape (input size = 12288, number of training examples = 1080)
Y_train -- test set, of shape (output size = 6, number of training examples = 1080)
X_test -- training set, of shape (input size = 12288, number of training examples = 120)
Y_test -- test set, of shape (output size = 6, number of test examples = 120)
learning_rate -- learning rate of the optimization
num_epochs -- number of epochs of the optimization loop
minibatch_size -- size of a minibatch
print_cost -- True to print the cost every 10 epochs
Returns:
parameters -- parameters learnt by the model. They can then be used to predict.
"""
costs = [] # To keep track of the cost
train_acc = []
test_acc = []
# Initialize your parameters
#(1 line)
parameters = initialize_parameters()
W1 = parameters['W1']
b1 = parameters['b1']
W2 = parameters['W2']
b2 = parameters['b2']
W3 = parameters['W3']
b3 = parameters['b3']
optimizer = tf.keras.optimizers.Adam(learning_rate)
# The CategoricalAccuracy will track the accuracy for this multiclass problem
test_accuracy = tf.keras.metrics.CategoricalAccuracy()
train_accuracy = tf.keras.metrics.CategoricalAccuracy()
dataset = tf.data.Dataset.zip((X_train, Y_train))
test_dataset = tf.data.Dataset.zip((X_test, Y_test))
# We can get the number of elements of a dataset using the cardinality method
m = dataset.cardinality().numpy()
minibatches = dataset.batch(minibatch_size).prefetch(8)
test_minibatches = test_dataset.batch(minibatch_size).prefetch(8)
#X_train = X_train.batch(minibatch_size, drop_remainder=True).prefetch(8)# <<< extra step
#Y_train = Y_train.batch(minibatch_size, drop_remainder=True).prefetch(8) # loads memory faster
# Do the training loop
for epoch in range(num_epochs):
epoch_total_loss = 0.
#We need to reset object to start measuring from 0 the accuracy each epoch
train_accuracy.reset_states()
for (minibatch_X, minibatch_Y) in minibatches:
with tf.GradientTape() as tape:
# 1. predict
Z3 = forward_propagation(tf.transpose(minibatch_X), parameters)
# 2. loss
minibatch_total_loss = compute_total_loss(Z3, tf.transpose(minibatch_Y))
# We accumulate the accuracy of all the batches
train_accuracy.update_state(minibatch_Y, tf.transpose(Z3))
trainable_variables = [W1, b1, W2, b2, W3, b3]
grads = tape.gradient(minibatch_total_loss, trainable_variables)
optimizer.apply_gradients(zip(grads, trainable_variables))
epoch_total_loss += minibatch_total_loss
# We divide the epoch total loss over the number of samples
epoch_total_loss /= m
# Print the cost every 10 epochs
if print_cost == True and epoch % 10 == 0:
print ("Cost after epoch %i: %f" % (epoch, epoch_total_loss))
print("Train accuracy:", train_accuracy.result())
# We evaluate the test set every 10 epochs to avoid computational overhead
for (minibatch_X, minibatch_Y) in test_minibatches:
Z3 = forward_propagation(tf.transpose(minibatch_X), parameters)
test_accuracy.update_state(minibatch_Y, tf.transpose(Z3))
print("Test_accuracy:", test_accuracy.result())
costs.append(epoch_total_loss)
train_acc.append(train_accuracy.result())
test_acc.append(test_accuracy.result())
test_accuracy.reset_states()
return parameters, costs, train_acc, test_acc