在使用全连接层的时候我们可以使用高级接口直接调用Dense层,那有没有想过自己实现一个Dense层的,下面我们一起实现一个Dense层
这是自己实现的第一个类,自动调用 call 方法,继承 layers.Layer 类,实现 y = wx + b
class MyDense(layers.Layer):
def __init__(self, inp_dim, outp_dim): # 这是输入维度 和 输出维度
super(MyDense, self).__init__() # 需要继承父类
self.kernel = self.add_variable('w', [inp_dim, outp_dim])
self.bias = self.add_variable('b', [outp_dim])
def call(self, inputs, **kwargs):
out = inputs @ self.kernel + self.bias
return out
接下来就是实现自己的模型 MyModel, 继承 keras.Model 类, 同样也是需要调用 super 方法
class MyModel(keras.Model):
def __init__(self):
super(MyModel, self).__init__()
self.fc1 = MyDense(28 * 28, 256)
self.fc2 = MyDense(256, 128)
self.fc3 = MyDense(128, 64)
self.fc4 = MyDense(64, 32)
# 因为这个层是用来对 minist 数据集进行训练 所以最后的输出维度是 10
self.fc5 = MyDense(32, 10)
def call(self, inputs, training=None, mask=None):
x = self.fc1(inputs)
# 中间加入 relu 非线性作为激活函数
x = tf.nn.relu(x)
x = self.fc2(x)
x = tf.nn.relu(x)
x = self.fc3(x)
x = tf.nn.relu(x)
x = self.fc4(x)
x = tf.nn.relu(x)
x = self.fc5(x)
return x
接下来就是数据预处理函数
def process(x, y):
x = tf.cast(x, dtype=tf.float32) / 255.
y = tf.cast(y, dtype=tf.int64)
x = tf.reshape(x, [28 * 28])
y = tf.one_hot(y, depth=10)
return x, y
训练数据部分
# 设置 batch_size 的大小
batch_size = 128
# 加载数据集
(x, y), (x_test, y_test) = datasets.mnist.load_data()
db = tf.data.Dataset.from_tensor_slices((x, y))
db = db.map(process).shuffle(1000).batch(batch_size)
db_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))
db_test = db_test.map(process).batch(batch_size)
# 直接调用模型 模型的层数已经在自定义里面设置了
net_work = MyModel()
net_work.compile(
optimizer=optimizers.Adam(lr=0.01), # 设置优化器,并设置学习速率
loss=tf.losses.CategoricalCrossentropy(from_logits=True), # 定义损失函数
metrics=["accuracy"] # 设置模型评估参数
)
# 进行训练
net_work.fit(db, epochs=5, validation_data=db_test, validation_freq=2)
# 在测试集上进行验证
net_work.evaluate(db_test)
全部代码
import tensorflow as tf
from tensorflow.keras import optimizers, datasets, Sequential, metrics, layers
from tensorflow import keras
class MyDense(layers.Layer):
def __init__(self, inp_dim, outp_dim):
super(MyDense, self).__init__()
self.kernel = self.add_variable('w', [inp_dim, outp_dim])
self.bias = self.add_variable('b', [outp_dim])
def call(self, inputs, **kwargs):
out = inputs @ self.kernel + self.bias
return out
class MyModel(keras.Model):
def __init__(self):
super(MyModel, self).__init__()
self.fc1 = MyDense(28 * 28, 256)
self.fc2 = MyDense(256, 128)
self.fc3 = MyDense(128, 64)
self.fc4 = MyDense(64, 32)
self.fc5 = MyDense(32, 10)
def call(self, inputs, training=None, mask=None):
x = self.fc1(inputs)
x = tf.nn.relu(x)
x = self.fc2(x)
x = tf.nn.relu(x)
x = self.fc3(x)
x = tf.nn.relu(x)
x = self.fc4(x)
x = tf.nn.relu(x)
x = self.fc5(x)
return x
def process(x, y):
x = tf.cast(x, dtype=tf.float32) / 255.
y = tf.cast(y, dtype=tf.int64)
x = tf.reshape(x, [28 * 28])
y = tf.one_hot(y, depth=10)
return x, y
# x = tf.cast(x, dtype=tf.float32) / 255.
# x = tf.reshape(x, [28 * 28])
# y = tf.cast(y, dtype=tf.int32)
# y = tf.one_hot(y, depth=10)
# return x, y
batch_size = 128
(x, y), (x_test, y_test) = datasets.mnist.load_data()
db = tf.data.Dataset.from_tensor_slices((x, y))
db = db.map(process).shuffle(1000).batch(batch_size)
db_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))
db_test = db_test.map(process).batch(batch_size)
print(x.shape, y.shape)
net_work = MyModel()
net_work.compile(
optimizer=optimizers.Adam(lr=0.01),
loss=tf.losses.CategoricalCrossentropy(from_logits=True),
# loss=tf.losses.MSE,
metrics=["accuracy"]
)
net_work.fit(db, epochs=5, validation_data=db_test, validation_freq=2)
net_work.evaluate(db_test)