自定义网络
- keras.Sequential 容器
- keras.layers.Layer
- keras.Model
Keras.Sequential 容器
网络层的搭建
model = keras.Sequential([keras.layers.Dense(256, activation="relu"),
keras.layers.Dense(128, activation="relu"),
keras.layers.Dense(64, activation="relu"),
keras.layers.Dense(10, activation="softmax")])
model.build(input_shape=(None, 28*28))
model.summary()
-
model.trainable_variables 可训练变量
-
model.call() 自动各层的前向传播
可实现 model(x)的调用可用
Layer/Model
- Layer 继承自 Model
- “init” 实现
- call 实现
- Model: compile/ fit / evaluate
自定义全连接层
class MyDense(layers.Layer): # 继承自 layer
def __init__(self, inp_dim, outp_dim):
super(MyDense, self).__init__() # 调用基类初始化
# 创建可训练变量 add_variable 基类中实现
self.kernel = self.add_variable("w", [inp_dim, outp_dim])
self.bias = self.add_variable("b", [outp_dim])
# training 训练的逻辑
def call(self, inputs, training=None):
out = inputs @ self.kernel + self.bias
return out
使用自定义全连接层
class MyModel(keras.Model) # 继承自Model
def __init__(self):
super(MyModel, self).__init__()
# 使用自定义全连接层
self.fc1 = MyDense(28*28, 256)
self.fc2 = MyDense(256, 128)
self.fc3 = MyDense(128, 64)
self.fc4 = MyDense(64, 32)
self.fc5 = MyDense(32, 10)
def call(self, inputs, training=None):
# 网络的叠加
x = self.fc1(inputs)
x = tf.nn.relu(x)
x = self.fc2(x)
x = tf.nn.relu(x)
x = self.fc3(x)
x = tf.nn.relu(x)
x = self.fc4(x)
x = tf.nn.relu(x)
x = self.fc5(x)
return x
完整代码使用
import tensorflow as tf
from tensorflow.keras import datasets, layers
# 获取物理GPU的个数
gpus = tf.config.experimental.list_physical_devices("GPU")
for gpu in gpus:
# 设置内存增长方式 自增长
tf.config.experimental.set_memory_growth(gpu, True)
print("物理GPU个数:", len(gpus))
batch_size = 128
# 导入数据集
(x_train, y_train), (x_test, y_test) = datasets.mnist.load_data()
print(x_train.shape, y_train.shape, x_test.shape, y_test.shape)
# 数据的预处理
def pre_process(x, y):
x = tf.cast(x, tf.float32) / 255.0
x = tf.reshape(x, (28 * 28,))
y = tf.cast(y, tf.int32)
return x, y
# 数据映射 打乱 分批 重复处理
db_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
db_train = db_train.map(pre_process).shuffle(60000).batch(batch_size).repeat()
db_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))
db_test = db_test.map(pre_process).batch(batch_size)
db_sample = iter(db_train)
sample = next(db_sample)
print("X:", sample[0].shape, "Y:", sample[1].shape)
# 自定义全连接层
class MyDense(layers.Layer): # 继承自 layer
def __init__(self, inp_dim, outp_dim):
super(MyDense, self).__init__() # 调用基类初始化
# 创建可训练变量 add_variable 基类中实现
self.kernel = self.add_weight("w", [inp_dim, outp_dim])
self.bias = self.add_weight("b", [outp_dim])
# training 训练的逻辑
def call(self, inputs, training=None):
out = inputs @ self.kernel + self.bias
return out
# 自定义模型
class MyModel(tf.keras.Model): # 继承自Model
def __init__(self):
super(MyModel, self).__init__()
# 使用自定义全连接层
self.fc1 = MyDense(28 * 28, 256)
self.fc2 = MyDense(256, 128)
self.fc3 = MyDense(128, 64)
self.fc4 = MyDense(64, 32)
self.fc5 = MyDense(32, 10)
def call(self, inputs, training=None, mask=None):
# 网络的叠加
x = self.fc1(inputs)
x = tf.nn.relu(x) # 激活函数
x = self.fc2(x)
x = tf.nn.relu(x)
x = self.fc3(x)
x = tf.nn.relu(x)
x = self.fc4(x)
x = tf.nn.relu(x)
x = self.fc5(x)
x = tf.nn.softmax(x)
return x
# 创建自定义模型
network = MyModel()
# 装载模型
network.compile(loss="sparse_categorical_crossentropy", # 损失函数
optimizer="adam", # 优化器
metrics=["accuracy"])
network.build(input_shape=(None, 28 * 28))
network.summary()
# 训练模型
network.fit(db_train, # 指定数据集
epochs=10, # 循环次数
validation_data=db_test, # 测试数据集
validation_freq=2, # 验证频次
steps_per_epoch=x_train.shape[0] // batch_size) # 多余数据
# 评估模型
network.evaluate(db_test)
自定义网络 CIFAR10测试 保存模型
import numpy as np
import tensorflow as tf
from tensorflow.keras import datasets, layers, optimizers
gpu_s = tf.config.experimental.list_physical_devices("GPU")
for gpu in gpu_s:
tf.config.experimental.set_memory_growth(gpu, True)
print("GPU个数:", len(gpu_s))
# 加载数据集
batch_size = 128
(x_train, y_train), (x_test, y_test) = datasets.cifar10.load_data()
y_train = tf.squeeze(y_train)
y_test = tf.squeeze(y_test)
y_train = tf.one_hot(y_train, depth=10)
y_test = tf.one_hot(y_test, depth=10)
print(x_train.shape, y_train.shape, x_train.min(), x_train.min())
print(x_test.shape, y_test.shape, x_test.min(), x_test.min())
# 预处理
def pre_process(x, y):
x = 2 * (tf.cast(x, dtype=tf.float32) / 255.0) - 1
y = tf.cast(y, dtype=tf.int32)
return x, y
# dataset 创建
db_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
db_train = db_train.map(pre_process).shuffle(10000).batch(batch_size).repeat()
db_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))
db_test = db_test.map(pre_process).batch(batch_size)
sample = next(iter(db_train))
print("batch:", sample[0].shape, sample[1].shape, "min:", np.min(sample[0]), np.max(sample[0]))
# 自定义全连接层
class MyDense(layers.Layer):
# 实现全连接层
def __init__(self, input_dim, output_dim):
super(MyDense, self).__init__()
self.kernel = self.add_weight("w", [input_dim, output_dim])
# self.bias = self.add_weight("b", [output_dim])
def call(self, inputs, training=None):
x = inputs @ self.kernel
return x
# 自定义模型
class MyNetwork(tf.keras.Model):
def __init__(self):
super(MyNetwork, self).__init__()
self.fc1 = MyDense(32 * 32 * 3, 256) # 全连接层
self.fc2 = MyDense(256, 128)
self.fc3 = MyDense(128, 64)
self.fc4 = MyDense(64, 32)
self.fc5 = MyDense(32, 10)
def call(self, inputs, training=None, mask=None):
# 组建网络
x = tf.reshape(inputs, [-1, 32 * 32 * 3])
x = self.fc1(x) # 全连接层 + 激活函数
x = tf.nn.relu(x)
x = self.fc2(x)
x = tf.nn.relu(x)
x = self.fc3(x)
x = tf.nn.relu(x)
x = self.fc4(x)
x = tf.nn.relu(x)
x = self.fc5(x)
return x
# 创建自定义网络
model = MyNetwork()
model.compile(optimizer=optimizers.Adam(lr=1e-3),
loss=tf.losses.CategoricalCrossentropy(from_logits=True),
metrics=["accuracy"])
# 训练自定义网络
model.fit(db_train, epochs=15,
validation_data=db_test,
validation_freq=2,
steps_per_epoch=x_train.shape[0] // batch_size)
# 网络验证
model.evaluate(db_test)
# 保存网络
save_path = "ckpt/weights.ckpt"
model.save_weights(save_path)
print("保存权值文件")
# 删除网络
del model
# 重新创建网络
model = MyNetwork()
model.compile(optimizer=optimizers.Adam(lr=1e-3),
loss=tf.losses.CategoricalCrossentropy(from_logits=True),
metrics=["accuracy"])
# 重新加载网络
model.load_weights(save_path)
print("从文件加载权值文件")
# 再次验证网络
model.evaluate(db_test)