版权提示:以下所有例子都是参考github大神制作,我只是搬运工
https://github.com/YunYang1994/TensorFlow2.0-Examples
一、制作数据
import numpy as np
import tensorflow as tf
#参数设置
learning_rate = 0.001
training_step = 3000
batch_size = 100
display_step = 300
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
#将数据转换成 float32
x_train, x_test = np.array(x_train, np.float32), np.array(x_test,np.float32)
#将数据拉平成1-D的向量 all * 28*28 ==> all* 784
x_train, x_test = x_train.reshape([-1, 784]), x_test.reshape([-1, 784])
#归一化, [0, 255] ==> [0, 1]
x_train, x_test = x_train / 255., x_test / 255.
#使用tf的API切片化数据,shuffle和batch等
train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_data = train_data.repeat().shuffle(5000).batch(batch_size).prefetch(1)
二、设置网络参数,并构建网络结构
# 网络的参数
n_hidden_1 = 256 #第一层神经元的数量
n_hidden_2 = 256 # 第二层神经元的数量
n_input = 784 # mnist 数据输入
n_classes = 10 # mnist 数据类别0-9
# 网络的权重和偏执
weights = {
"h1": tf.Variable(tf.random.normal([n_input, n_hidden_1])),
"h2": tf.Variable(tf.random.normal([n_hidden_1, n_hidden_2])),
"out": tf.Variable(tf.random.normal([n_hidden_2, n_classes]))
}
biases = {
"b1": tf.Variable(tf.random.normal([n_hidden_1])),
"b2": tf.Variable(tf.random.normal([n_hidden_2])),
"out": tf.Variable(tf.random.normal([n_classes]))
}
# 创建模型
def mutilayer_preceptron(x):
# 全连接1
layer_1 = tf.add(tf.matmul(x, weights["h1"]), biases["b1"])
layer_1 = tf.nn.sigmoid(layer_1)
#全连接2
layer_2 = tf.add(tf.matmul(layer_1, weights["h2"]), biases["b2"])
layer_2 = tf.nn.sigmoid(layer_2)
output = tf.add(tf.matmul(layer_2, weights["out"]), biases["out"])
return tf.nn.softmax(output)
三、定义损失函数和评估准确率
# 定义交叉熵损失函数
def cross_entropy(y_pred, y_true):
# one_hot 解码
y_true = tf.one_hot(y_true, depth=10)
y_pred = tf.clip_by_value(y_pred, 1e-9, 1.) # 将数据限制在1e-9 - 1之间, 避免 log(0)出现
return tf.reduce_mean(-tf.reduce_sum(y_true * tf.math.log(y_pred)))
# 定义评估的准确率
def accuracy(y_pred, y_true):
correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.cast(y_true, tf.int64))
return tf.reduce_mean(tf.cast(correct_prediction, tf.float32), axis=-1)
四、开始训练
#sgd 随机梯度下降法
optimizer = tf.optimizers.SGD(learning_rate)
def train_step(x, y):
# 利用tf的梯度带函数自动微分
with tf.GradientTape() as tape:
pred = mutilayer_preceptron(x)
loss = cross_entropy(pred, y)
# 将需要更新的变量收集起来,总不能[weights["h1"], weights["h2"] ... ]这样显得太长了,所以用下面一行
# 这也是事先定义网络权重和偏置的原因
train_variables = list(weights.values()) + list(biases.values())
# 计算梯度
gradients = tape.gradient(loss, train_variables)
# 更新梯度
optimizer.apply_gradients(zip(gradients, train_variables))
for step, (batch_x, batch_y) in enumerate(train_data.take(training_step), 1):
train_step(batch_x, batch_y)
if(step+1) % display_step == 0:
pred = mutilayer_preceptron(batch_x)
loss = cross_entropy(pred, batch_y)
acc = accuracy(pred, batch_y)
print("step: %i, loss: %f, accuracy: %f" %(step+1, loss, acc))
五、eager模式下保存与加载模型
我想用 .save()来保存h5模型,然后提示最好是 sequence序列下的网络结构才能保存h5模型,否则我只能保存权重参数. tf2.0还不是很熟悉,所以先保存权重参数,再通过代码的形式来加载计算图.
网络结构同上,不同的是,利用python继承 tf.keras.Model 类
class MutiLayerPreceptron(tf.keras.Model):
def __init__(self):
super(MutiLayerPreceptron, self).__init__()
self.W_1 = weights["h1"]
self.b_1 = biases["b1"]
self.W_2 = weights["h2"]
self.b_2 = biases["b2"]
self.W_3 = weights["out"]
self.b_3 = biases["out"]
def call(self, inputs):
layer_1 = tf.add(tf.matmul(inputs, self.W_1), self.b_1)
layer_1 = tf.nn.sigmoid(layer_1)
layer_2 = tf.add(tf.matmul(layer_1, self.W_2), self.b_2)
layer_2 = tf.nn.sigmoid(layer_2)
output = tf.add(tf.matmul(layer_2, self.W_3), self.b_3)
return tf.nn.softmax(output)
定义训练函数
# 定义交叉熵损失函数
def cross_entropy(y_pred, y_true):
# one_hot 解码
y_true = tf.one_hot(y_true, depth=10)
y_pred = tf.clip_by_value(y_pred, 1e-9, 1.) # 将数据限制在1e-9 - 1之间, 避免 log(0)出现
return tf.reduce_mean(-tf.reduce_sum(y_true * tf.math.log(y_pred)))
# 定义评估的准确率
def accuracy(y_pred, y_true):
correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.cast(y_true, tf.int64))
return tf.reduce_mean(tf.cast(correct_prediction, tf.float32), axis=-1)
# sgd 随机梯度下降法
model = MutiLayerPreceptron()
optimizer = tf.optimizers.SGD(learning_rate)
def train_step(x, y):
# 利用tf的梯度带函数自动微分
with tf.GradientTape() as tape:
pred = model(x)
loss = cross_entropy(pred, y)
# 将需要更新的变量收集起来,总不能[weights["h1"], weights["h2"] ... ]这样显得太长了,所以用下面一行
# 这也是事先定义网络权重和偏置的原因
# train_variables = [model.W_1, model.b_1, model.W_2, model.b_2, model.W_3, model.b_3]
train_variables = model.trainable_variables
# # 计算梯度
gradients = tape.gradient(loss, train_variables)
# # 更新梯度
optimizer.apply_gradients(zip(gradients, train_variables))
开始训练,并保存模型,注意,这里的保存模型只是保存的权重参数,并没有保存对应的网络图
for step, (batch_x, batch_y) in enumerate(train_data.take(training_step), 1):
train_step(batch_x, batch_y)
if(step+1) % display_step == 0:
print(step)
pred = model(batch_x)
loss = cross_entropy(pred, batch_y)
acc = accuracy(pred, batch_y)
# 保存模型
root = tf.train.Checkpoint(optimizer=optimizer,
model=model)
checkpoint_prefix = "./saved_model/step:%i"%(step+1)
root.save(checkpoint_prefix)
# saved_model_path = "./saved_model/%i,loss:%f,accuracy:%f_model" %(step+1, loss, acc)
# model.save(saved_model_path)
print("step: %i, loss: %f, accuracy: %f" %(step+1, loss, acc))
六、加载模型并预测
root.restore("./saved_model/step:3000-1")
pred = model(batch_x)
acc = accuracy(pred, batch_y)
pred = tf.argmax(pred, 1)
true = batch_y
print("acc:, ", acc.numpy())
# print(pred)
# print(true)