踩了无数的坑,终于是完成了。过程和用numpy写神经网络差不多,就是没了后向。
首先导包,对数据进行处理:
import os
import numpy as np
import h5py
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.python.framework import ops
import tf_utils
import time
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # 忽略警告
np.random.seed(1)
# 数据处理
train_set_x, train_set_y, test_set_x, test_set_y, classes = tf_utils.load_dataset()
# print(train_set_x.shape, train_set_y.shape, test_set_x.shape, test_set_y.shape, classes.shape)
train_x = train_set_x.reshape(train_set_x.shape[0], -1).T / 255 # 归一化操作,原图是RGB图像uint8
test_x = test_set_x.reshape(test_set_x.shape[0], -1).T / 255
def one_hot(y, c):
y_onehot = tf.one_hot(y, c.shape[0])
with tf.Session() as session:
result = session.run(y_onehot)
session.close()
return result
train_y = np.squeeze(one_hot(train_set_y, classes)).T
test_y = np.squeeze(one_hot(test_set_y, classes)).T
# print(train_x.shape, train_y.shape, test_x.shape, test_y.shape, classes.shape)
初始化权重:
def init(x, y):
first_num = 25
second_num = 12
third_num = 5
last_num = y.shape[0]
tf.set_random_seed(1)
w1 = tf.get_variable("w1", [first_num, x.shape[0]],
initializer=tf.contrib.layers.xavier_initializer())
b1 = tf.get_variable("b1", [first_num, 1], initializer=tf.zeros_initializer())
w2 = tf.get_variable("w2", [second_num, first_num],
initializer=tf.contrib.layers.xavier_initializer())
b2 = tf.get_variable("b2", [second_num, 1], initializer=tf.zeros_initializer())
w3 = tf.get_variable("w3", [third_num, second_num],
initializer=tf.contrib.layers.xavier_initializer())
b3 = tf.get_variable("b3", [third_num, 1], initializer=tf.zeros_initializer())
w4 = tf.get_variable("w4", [last_num, third_num], initializer=tf.contrib.layers.xavier_initializer())
b4 = tf.get_variable("b4", [last_num, 1], initializer=tf.zeros_initializer())
ini_param = {
"w1": w1,
"b1": b1,
"w2": w2,
"b2": b2,
"w3": w3,
"b3": b3,
"w4": w4,
"b4": b4
}
return ini_param
前向:
# 前向
def forward(p, x):
z1 = tf.matmul(p["w1"], x) + p["b1"]
a1 = tf.nn.relu(z1)
z2 = tf.matmul(p["w2"], a1) + p["b2"]
a2 = tf.nn.relu(z2)
z3 = tf.matmul(p["w3"], a2) + p["b3"]
a3 = tf.nn.relu(z3)
z4 = tf.matmul(p["w4"], a3) + p["b4"]
# a4 = tf.nn.softmax(z4) tensorflow 自带计算相关损失函数
forward_param = {
"z1": z1,
"a1": a1,
"z2": z2,
"a2": a2,
"z3": z3,
"a3": a3,
"z4": z4,
}
return forward_param
计算损失函数,这边tensorflow自动集成了softmax:此处要注意batchsize在前面,层数在后面,此处翻车浪费好几小时。
# 计算损失函数
def cost_f(z, y):
z = tf.transpose(z)
y = tf.transpose(y)
return tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=z, labels=y))
'''
第一个参数logits:就是神经网络最后一层的输出
如果有batch的话,它的大小就是[batchsize,num_classes],单样本的话,大小就是num_classes
第二个参数labels:实际的标签,大小同上
'''
建模:
1)此处纠正之前对mini_batch的认知错误,一个epoch是将整个数据集运一遍,minibatch是指在一次epoch中将数据集分成多部分来更新权重值。而不是我之前认为的将数据集分为多个,每个运多次。
2)数据运算慢,运算完之后最好保存模型。
# 后向和建模可以集成到一起
def model_min_batch_adam(x, y, min_batch_size, learning_rate, epoch, beta1=0.9, beta2=0.999, epsilon=1e-8):
size = x.shape[1]
# tf定义
tf.set_random_seed(1)
p = init(x, y)
X = tf.placeholder(tf.float32, [x.shape[0], None], name="X")
Y = tf.placeholder(tf.float32, [y.shape[0], None], name='Y')
f_p = forward(p, X)
cost = cost_f(f_p["z4"], Y)
# adam优化
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=beta1, beta2=beta2, epsilon=epsilon).minimize(
cost)
# 初始化所有的变量
ini = tf.global_variables_initializer()
saver=tf.train.Saver()
# 划分
costs = []
num = int(size / min_batch_size)
with tf.Session() as session:
session.run(ini) # 初始化
for i in range(epoch):
# 洗牌
temp = np.random.permutation(size) # 获取一个0-m之间的随机排列序列数组
shuttle_x = x[:, temp]
shuttle_y = y[:, temp]
cost_mean = 0
for j in range(num):
start = int(j * min_batch_size)
end = int((j + 1) * min_batch_size)
x_min_batch = shuttle_x[:, start:end]
y_min_batch = shuttle_y[:, start:end]
_, minibatch_cost = session.run([optimizer, cost], feed_dict={X: x_min_batch, Y: y_min_batch})
cost_mean = cost_mean + minibatch_cost
cost_mean = cost_mean / num
if size % min_batch_size != 0:
start = int(num * min_batch_size)
x_min_batch = shuttle_x[:, start:size]
y_min_batch = shuttle_y[:, start:size]
_, minibatch_cost = session.run([optimizer, cost], feed_dict={X: x_min_batch, Y: y_min_batch})
cost_mean = cost_mean + minibatch_cost
cost_mean = cost_mean / 2
if i % 10 == 0:
costs.append(cost_mean)
plt.plot(np.squeeze(costs))
plt.show()
p = session.run(p) # save p
saver.save(session,"./Model/model.ckpt")#保存模型
return p
预测:
def predict(x, y, p):
X = tf.placeholder(tf.float32, [x.shape[0], None], name="X")
Y = tf.placeholder(tf.float32, [y.shape[0], None], name='Y')
f_p = forward(p, X)
with tf.Session() as sess:
correct_prediction = tf.equal(tf.argmax(f_p["z4"]), tf.argmax(Y))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
'''tf.equal(A, B)是对比这两个矩阵或者向量的相等的元素,
如果是相等的那就返回True,反之返回False,返回的值的矩阵维度和A是一样的
tf.argmax(input,axis)根据axis取值的不同返回每行或者每列最大值的索引。
axis=0,按列返回;axis=1,按行返回
'''
result = accuracy.eval({X: x, Y: y})
'''tf.cast()数据类型转换,此处将bool转换成float'''
return result
运行:
time_start = time.time()
param = model_min_batch_adam(train_x, train_y, 32, 0.0003, 1500)
time_end = time.time()
print("用时:", time_end - time_start)
print("测试集的准确率:", predict(test_x, test_y, param))
print("训练集的准确率:", predict(train_x, train_y, param))
用时: 907.1318571567535
测试集的准确率: 0.733333
训练集的准确率: 0.996296
下面是我用0.00001 epoch=6000次跑了一晚上,但是没保存模型。。。。。。。。。