# GRADED FUNCTION: linear_functiondeflinear_function():"""
Implements a linear function:
Initializes X to be a random tensor of shape (3,1)
Initializes W to be a random tensor of shape (4,3)
Initializes b to be a random tensor of shape (4,1)
Returns:
result -- Y = WX + b
"""
np.random.seed(1)"""
Note, to ensure that the "random" numbers generated match the expected results,
please create the variables in the order given in the starting code below.
(Do not re-arrange the order).
"""# (approx. 4 lines)# X = ...# W = ...# b = ...# Y = ...# YOUR CODE STARTS HERE
X=tf.constant(np.random.randn(3,1),name='X')
W=tf.Variable(np.random.randn(4,3),name='W')
b=tf.Variable(np.random.randn(4,1),name='b')
Y=tf.add(tf.matmul(W,X),b)# YOUR CODE ENDS HEREreturn Y
设置激活函数
# GRADED FUNCTION: sigmoiddefsigmoid(z):"""
Computes the sigmoid of z
Arguments:
z -- input value, scalar or vector
Returns:
a -- (tf.float32) the sigmoid of z
"""# tf.keras.activations.sigmoid requires float16, float32, float64, complex64, or complex128.# (approx. 2 lines)# z = ...# a = ...# YOUR CODE STARTS HERE
z=tf.cast(z,tf.float32)# 转换为tf.float32类型
a=tf.keras.activations.sigmoid(z)# YOUR CODE ENDS HEREreturn a
多分类任务转换为独热码(one-hot encode)
# GRADED FUNCTION: one_hot_matrixdefone_hot_matrix(label, depth=6):"""
Computes the one hot encoding for a single label
Arguments:
label -- (int) Categorical labels
depth -- (int) Number of different classes that label can take
Returns:
one_hot -- tf.Tensor A single-column matrix with the one hot encoding.
"""# (approx. 1 line)# one_hot = ...# YOUR CODE STARTS HERE
one_hot=tf.reshape(tf.one_hot(label,depth,axis=0),[-1,])# YOUR CODE ENDS HEREreturn one_hot
初始化参数
# GRADED FUNCTION: initialize_parametersdefinitialize_parameters():"""
Initializes parameters to build a neural network with TensorFlow. The shapes are:
W1 : [25, 12288]
b1 : [25, 1]
W2 : [12, 25]
b2 : [12, 1]
W3 : [6, 12]
b3 : [6, 1]
Returns:
parameters -- a dictionary of tensors containing W1, b1, W2, b2, W3, b3
"""
initializer = tf.keras.initializers.GlorotNormal(seed=1)#(approx. 6 lines of code)# W1 = ...# b1 = ...# W2 = ...# b2 = ...# W3 = ...# b3 = ...# YOUR CODE STARTS HERE
W1=tf.Variable(initializer(shape=(25,12288)))
b1=tf.Variable(initializer(shape=(25,1)))
W2=tf.Variable(initializer(shape=(12,25)))
b2=tf.Variable(initializer(shape=(12,1)))
W3=tf.Variable(initializer(shape=(6,12)))
b3=tf.Variable(initializer(shape=(6,1)))# YOUR CODE ENDS HERE
parameters ={"W1": W1,"b1": b1,"W2": W2,"b2": b2,"W3": W3,"b3": b3}return parameters
前向传播
# GRADED FUNCTION: forward_propagationdefforward_propagation(X, parameters):"""
Implements the forward propagation for the model: LINEAR -> RELU -> LINEAR -> RELU -> LINEAR
Arguments:
X -- input dataset placeholder, of shape (input size, number of examples)
parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2", "W3", "b3"
the shapes are given in initialize_parameters
Returns:
Z3 -- the output of the last LINEAR unit
"""# Retrieve the parameters from the dictionary "parameters"
W1 = parameters['W1']
b1 = parameters['b1']
W2 = parameters['W2']
b2 = parameters['b2']
W3 = parameters['W3']
b3 = parameters['b3']#(approx. 5 lines) # Numpy Equivalents:# Z1 = ... # Z1 = np.dot(W1, X) + b1# A1 = ... # A1 = relu(Z1)# Z2 = ... # Z2 = np.dot(W2, A1) + b2# A2 = ... # A2 = relu(Z2)# Z3 = ... # Z3 = np.dot(W3, A2) + b3# YOUR CODE STARTS HERE
Z1=tf.math.add(tf.linalg.matmul(W1,X),b1)
A1=tf.keras.activations.relu(Z1)
Z2=tf.math.add(tf.linalg.matmul(W2,A1),b2)
A2=tf.keras.activations.relu(Z2)
Z3=tf.math.add(tf.linalg.matmul(W3,A2),b3)# YOUR CODE ENDS HEREreturn Z3
计算损失
# GRADED FUNCTION: compute_cost defcompute_cost(logits, labels):"""
Computes the cost
Arguments:
logits -- output of forward propagation (output of the last LINEAR unit), of shape (6, num_examples)
labels -- "true" labels vector, same shape as Z3
Returns:
cost - Tensor of the cost function
"""#(1 line of code)# cost = ...# YOUR CODE STARTS HERE
cost=tf.reduce_mean(tf.keras.losses.categorical_crossentropy(tf.transpose(labels),tf.transpose(logits),from_logits=True))# from_logits is true mean adding a softmax_layer # before the loss function(y_true*log(y_pred))# YOUR CODE ENDS HEREreturn cost
构建模型
defmodel(X_train, Y_train, X_test, Y_test, learning_rate =0.0001,
num_epochs =1500, minibatch_size =32, print_cost =True):"""
Implements a three-layer tensorflow neural network: LINEAR->RELU->LINEAR->RELU->LINEAR->SOFTMAX.
Arguments:
X_train -- training set, of shape (input size = 12288, number of training examples = 1080)
Y_train -- test set, of shape (output size = 6, number of training examples = 1080)
X_test -- training set, of shape (input size = 12288, number of training examples = 120)
Y_test -- test set, of shape (output size = 6, number of test examples = 120)
learning_rate -- learning rate of the optimization
num_epochs -- number of epochs of the optimization loop
minibatch_size -- size of a minibatch
print_cost -- True to print the cost every 10 epochs
Returns:
parameters -- parameters learnt by the model. They can then be used to predict.
"""
costs =[]# To keep track of the cost
train_acc =[]
test_acc =[]# Initialize your parameters#(1 line)
parameters = initialize_parameters()
W1 = parameters['W1']
b1 = parameters['b1']
W2 = parameters['W2']
b2 = parameters['b2']
W3 = parameters['W3']
b3 = parameters['b3']
optimizer = tf.keras.optimizers.Adam(learning_rate)# The CategoricalAccuracy will track the accuracy for this multiclass problem
test_accuracy = tf.keras.metrics.CategoricalAccuracy()
train_accuracy = tf.keras.metrics.CategoricalAccuracy()
dataset = tf.data.Dataset.zip((X_train, Y_train))
test_dataset = tf.data.Dataset.zip((X_test, Y_test))# We can get the number of elements of a dataset using the cardinality method
m = dataset.cardinality().numpy()
minibatches = dataset.batch(minibatch_size).prefetch(8)
test_minibatches = test_dataset.batch(minibatch_size).prefetch(8)#X_train = X_train.batch(minibatch_size, drop_remainder=True).prefetch(8)# <<< extra step #Y_train = Y_train.batch(minibatch_size, drop_remainder=True).prefetch(8) # loads memory faster # Do the training loopfor epoch inrange(num_epochs):
epoch_cost =0.#We need to reset object to start measuring from 0 the accuracy each epoch
train_accuracy.reset_states()for(minibatch_X, minibatch_Y)in minibatches:with tf.GradientTape()as tape:# 1. predict
Z3 = forward_propagation(tf.transpose(minibatch_X), parameters)# 2. loss
minibatch_cost = compute_cost(Z3, tf.transpose(minibatch_Y))# We acumulate the accuracy of all the batches
train_accuracy.update_state(tf.transpose(Z3), minibatch_Y)
trainable_variables =[W1, b1, W2, b2, W3, b3]
grads = tape.gradient(minibatch_cost, trainable_variables)
optimizer.apply_gradients(zip(grads, trainable_variables))
epoch_cost += minibatch_cost
# We divide the epoch cost over the number of samples
epoch_cost /= m
# Print the cost every 10 epochsif print_cost ==Trueand epoch %10==0:print("Cost after epoch %i: %f"%(epoch, epoch_cost))print("Train accuracy:", train_accuracy.result())# We evaluate the test set every 10 epochs to avoid computational overheadfor(minibatch_X, minibatch_Y)in test_minibatches:
Z3 = forward_propagation(tf.transpose(minibatch_X), parameters)
test_accuracy.update_state(tf.transpose(Z3), minibatch_Y)print("Test_accuracy:", test_accuracy.result())
costs.append(epoch_cost)
train_acc.append(train_accuracy.result())
test_acc.append(test_accuracy.result())
test_accuracy.reset_states()return parameters, costs, train_acc, test_acc