这里使用tensorflow构建三层神经网络,模型的大致结构为LINEAR->RELU->LINEAR->RELU->LINEAR->SOFTMAX,最后一层是SOFTMAX层,而不是SIGMOID层。
首先需要对输入X和输出Y创建占位符,这样下面执行前向传播时可以直接将训练集的数据填充到tensorflow计算图中。
def create_placeholders(n_x, n_y):
"""
参数:
n_x --输入向量的大小
n_y -- 输出类别数量
"""
X = tf.placeholder(tf.float32, [n_x, None], name="X")
Y = tf.placeholder(tf.float32, [n_y, None], name="Y")
return X, Y
第二个任务就是用tensorflow来初始化神经网络模型的参数。
def initialize_parameters():
# 设置tensorflow中的随机数种子
tf.set_random_seed(1)
# 用tensorflow内置的xavier_initializer函数来进行w的初始化。
W1 = tf.get_variable("W1", [25, 12288], initializer = tf.contrib.layers.xavier_initializer(seed=1))
# 我们将b初始化为0
b1 = tf.get_variable("b1", [25, 1], initializer = tf.zeros_initializer())
W2 = tf.get_variable("W2", [12, 25], initializer = tf.contrib.layers.xavier_initializer(seed=1))
b2 = tf.get_variable("b2", [12, 1], initializer = tf.zeros_initializer())
W3 = tf.get_variable("W3", [6, 12], initializer = tf.contrib.layers.xavier_initializer(seed=1))
b3 = tf.get_variable("b3", [6, 1], initializer = tf.zeros_initializer())
parameters = {"W1": W1,
"b1": b1,
"W2": W2,
"b2": b2,
"W3": W3,
"b3": b3}
return parameters
第三个任务用tensorflow实现神经网络前向传播。
def forward_propagation(X, parameters):
W1 = parameters['W1']
b1 = parameters['b1']
W2 = parameters['W2']
b2 = parameters['b2']
W3 = parameters['W3']
b3 = parameters['b3']
Z1 = tf.add(tf.matmul(W1, X), b1)
A1 = tf.nn.relu(Z1)
Z2 = tf.add(tf.matmul(W2, A1), b2)
A2 = tf.nn.relu(Z2)
Z3 = tf.add(tf.matmul(W3, A2), b3)
return Z3
第四个任务是用tensorflow计算神经网络cost成本。
def compute_cost(Z3, Y):
logits = tf.transpose(Z3)
labels = tf.transpose(Y)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels))
return cost
然后就可以构建完整的模型了。
def model(X_train, Y_train, X_test, Y_test, learning_rate = 0.0001,
num_epochs = 1500, minibatch_size = 32, print_cost = True):
ops.reset_default_graph()# 将计算图返回到默认空状态
tf.set_random_seed(1)
seed = 3
(n_x, m) = X_train.shape # (n_x: 特征数量, m : 训练集中的样本数)
n_y = Y_train.shape[0]
costs = []
# 创建占位符
X, Y = create_placeholders(n_x, n_y)
# 初始化参数
parameters = initialize_parameters()
# 构建前向传播操作
Z3 = forward_propagation(X, parameters)
# 构建成本计算操作
cost = compute_cost(Z3, Y)
# 构建反向传播,为反向传播指定优化算法和学习率以及成本函数,这里我们使用adam算法,
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# 定义初始化操作
init = tf.global_variables_initializer()
# 开始一个tensorflow的session
with tf.Session() as sess:
# 执行初始化操作
sess.run(init)
# 执行epochs指定的训练次数
for epoch in range(num_epochs):
epoch_cost = 0.
num_minibatches = int(m / minibatch_size) # 计算有多少个子训练集
seed = seed + 1
# 将数据集分成若干子训练集
minibatches = random_mini_batches(X_train, Y_train, minibatch_size, seed)
# 循环遍历每一个子训练集
for minibatch in minibatches:
(minibatch_X, minibatch_Y) = minibatch
# 整个计算图被执行,从前向传播操作到反向传播操作,最后到参数更新操作。
_ , minibatch_cost = sess.run([optimizer, cost], feed_dict={X: minibatch_X, Y: minibatch_Y})
epoch_cost += minibatch_cost / num_minibatches
if print_cost == True and epoch % 100 == 0:
print ("Cost after epoch %i: %f" % (epoch, epoch_cost))
if print_cost == True and epoch % 5 == 0:
costs.append(epoch_cost)
# 画出cost成本的走势图
plt.plot(np.squeeze(costs))
plt.ylabel('cost')
plt.xlabel('iterations (per tens)')
plt.title("Learning rate =" + str(learning_rate))
plt.show()
# 从计算图中获取训练好了的参数
parameters = sess.run(parameters)
print("Parameters have been trained!")
# 分别计算一下在训练集和测试集上面的预测精准度
correct_prediction = tf.equal(tf.argmax(Z3), tf.argmax(Y))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print("Train Accuracy:", accuracy.eval({X: X_train, Y: Y_train}))
print("Test Accuracy:", accuracy.eval({X: X_test, Y: Y_test}))
return parameters
在本地使用上述的网络的cost成本曲线为: