第十二章 自定义指标与层
目录
from tensorflow import keras
import tensorflow as tf
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
print("tensorflow version: ", tf.__version__)
tensorflow version: 2.4.1
一、数据准备
data_set = fetch_california_housing()
print(data_set.DESCR)
.. _california_housing_dataset:
California Housing dataset
--------------------------
**Data Set Characteristics:**
:Number of Instances: 20640
:Number of Attributes: 8 numeric, predictive attributes and the target
:Attribute Information:
- MedInc median income in block
- HouseAge median house age in block
- AveRooms average number of rooms
- AveBedrms average number of bedrooms
- Population block population
- AveOccup average house occupancy
- Latitude house block latitude
- Longitude house block longitude
:Missing Attribute Values: None
This dataset was obtained from the StatLib repository.
http://lib.stat.cmu.edu/datasets/
The target variable is the median house value for California districts.
This dataset was derived from the 1990 U.S. census, using one row per census
block group. A block group is the smallest geographical unit for which the U.S.
Census Bureau publishes sample data (a block group typically has a population
of 600 to 3,000 people).
It can be downloaded/loaded using the
:func:`sklearn.datasets.fetch_california_housing` function.
.. topic:: References
- Pace, R. Kelley and Ronald Barry, Sparse Spatial Autoregressions,
Statistics and Probability Letters, 33 (1997) 291-297
# 查看数据集基本信息
data_set.data.shape, data_set.data.dtype, data_set.target_names, data_set.target.dtype
((20640, 8), dtype('float64'), ['MedHouseVal'], dtype('float64'))
X, y = data_set.data, data_set.target
X_train_full, X_test, y_train_full, y_test = train_test_split(X, y)
X_train, X_valid, y_train, y_valid = train_test_split(X_train_full, y_train_full)
from sklearn.preprocessing import StandardScaler
std_scaler = StandardScaler()
X_train = std_scaler.fit_transform(X_train)
X_valid = std_scaler.transform(X_valid)
X_test = std_scaler.transform(X_test)
二、自定义指标
2.1 定义一个简单模型
model = keras.models.Sequential([
keras.layers.Dense(30, activation="selu",
kernel_initializer="lecun_normal", input_shape=X_train.shape[1:]),
keras.layers.Dense(1)
])
2.2、自定义指标
2.2.1 定义指标
def create_huber(threshold):
def huber_fun(y_true, y_pred):
error = y_true - y_pred
square_error = tf.square(error) / 2.0
# 因为需要保证huber函数在整个实数区间是光滑的,
# 所以在令方差函数,和线性函数在 threhold点的取值与导数相等, 即线性函数是方差函数在threshold点的切线
linear_error = threshold * tf.abs(error) - (threshold ** 2 ) / 2.0
# 小于 阈值的 绝对误差,都使用方差, 大于等于阈值的使用线性误差
is_error = tf.abs(error) < threshold
return tf.where(is_error, square_error, linear_error)
return huber_fun
# 可视化huber函数
import matplotlib.pyplot as plt
import numpy as np
hub_f = create_huber(1.0)
x = np.linspace(-3.0, 3.0, 100, dtype=np.float32)
y = np.zeros_like(x, dtype=np.float32)
y = hub_f(x, y)
plt.figure(figsize=(8,3))
plt.plot(x, y, "b-")
plt.grid(True)
plt.show()
class HuberMetric(keras.metrics.Metric):
# keras.metrics.Metric 是一种流式指标
def __init__(self, threshold=1.0, print_log=False, **kwargs):
# 先调用父类的初始化方法
super().__init__(**kwargs)
# 自定义参数
self.threshold = threshold
self.huber_fn = create_huber(threshold)
# 自定义参数,控制是否打印log,以显示类的调用细节
self.print_log = print_log
# 使用add_weight() 增加两个变量, 提供给keras再训练中更新其值
self.total = self.add_weight("total", initializer="zeros")
self.count = self.add_weight("count", initializer="zeros")
if print_log:
# <class 'tensorflow.python.ops.resource_variable_ops.ResourceVariable'>
print(f"=== total: {type(self.total)}")
def update_state(self, y_true, y_pred, sample_weight=None):
# 当类作为函数调用时,会调用update_state
# 计算指标值
metric = self.huber_fn(y_true, y_pred)
# 将total,加上当前的指标值
self.total.assign_add(tf.reduce_sum(metric))
self.count.assign_add(tf.cast(tf.size(y_true), tf.float32))
if self.print_log:
print(f"=== update_state: 被调用, {self.total}, {self.count}")
def result(self):
return self.total / self.count
def get_config(self):
# 在save model 的时候可以保存threshold
base_config = super().get_config()
if self.print_log:
print(f"=== get_config: {self.threshold}")
return {**base_config, "threshold": self.threshold}
2.2.2 流式指标的相关操作
h = HuberMetric(2., print_log=True)
# 传入样本值与预测值,计算指标
h(tf.constant([[2.]]), tf.constant([[10.0]]))
=== total: <class 'tensorflow.python.ops.resource_variable_ops.ResourceVariable'>
=== update_state: 被调用, <tf.Variable 'total:0' shape=() dtype=float32, numpy=14.0>, <tf.Variable 'count:0' shape=() dtype=float32, numpy=1.0>
<tf.Tensor: shape=(), dtype=float32, numpy=14.0>
# 再次计算更新指标
h(tf.constant([[0.], [5.]]), tf.constant([[1.], [9.25]]))
=== update_state: 被调用, <tf.Variable 'total:0' shape=() dtype=float32, numpy=21.0>, <tf.Variable 'count:0' shape=() dtype=float32, numpy=3.0>
<tf.Tensor: shape=(), dtype=float32, numpy=7.0>
# 查看当前值
h.result()
<tf.Tensor: shape=(), dtype=float32, numpy=7.0>
# 查看会被跟踪的变量
h.variables
[<tf.Variable 'total:0' shape=() dtype=float32, numpy=21.0>,
<tf.Variable 'count:0' shape=() dtype=float32, numpy=3.0>]
# 重置变量值, 全部归零
h.reset_states()
# 再次查看值
h.variables
[<tf.Variable 'total:0' shape=() dtype=float32, numpy=0.0>,
<tf.Variable 'count:0' shape=() dtype=float32, numpy=0.0>]
2.2.3 使用自定义指标编码以及训练模型
model.compile(loss="mse", optimizer="nadam", metrics=[HuberMetric(2.0, print_log=True)])
=== total: <class 'tensorflow.python.ops.resource_variable_ops.ResourceVariable'>
history = model.fit(X_train, y_train, epochs=10, validation_data=(X_valid, y_valid))
Epoch 1/10
=== update_state: 被调用, <tf.Variable 'total:0' shape=() dtype=float32>, <tf.Variable 'count:0' shape=() dtype=float32>
=== update_state: 被调用, <tf.Variable 'total:0' shape=() dtype=float32>, <tf.Variable 'count:0' shape=() dtype=float32>
353/363 [============================>.] - ETA: 0s - loss: 3.1779 - huber_metric_1: 1.3426=== update_state: 被调用, <tf.Variable 'total:0' shape=() dtype=float32>, <tf.Variable 'count:0' shape=() dtype=float32>
363/363 [==============================] - 2s 3ms/step - loss: 3.1454 - huber_metric_1: 1.3255 - val_loss: 0.5870 - val_huber_metric_1: 0.2834
Epoch 2/10
363/363 [==============================] - 1s 2ms/step - loss: 0.9980 - huber_metric_1: 0.2823 - val_loss: 0.5407 - val_huber_metric_1: 0.2617
Epoch 3/10
363/363 [==============================] - 1s 2ms/step - loss: 0.4985 - huber_metric_1: 0.2352 - val_loss: 0.5258 - val_huber_metric_1: 0.2548
Epoch 4/10
363/363 [==============================] - 1s 2ms/step - loss: 0.5638 - huber_metric_1: 0.2471 - val_loss: 0.5377 - val_huber_metric_1: 0.2564
Epoch 5/10
363/363 [==============================] - 1s 2ms/step - loss: 0.4865 - huber_metric_1: 0.2352 - val_loss: 0.4977 - val_huber_metric_1: 0.2424
Epoch 6/10
363/363 [==============================] - 1s 2ms/step - loss: 0.4710 - huber_metric_1: 0.2301 - val_loss: 0.4928 - val_huber_metric_1: 0.2401
Epoch 7/10
363/363 [==============================] - 1s 2ms/step - loss: 0.4476 - huber_metric_1: 0.2204 - val_loss: 0.4931 - val_huber_metric_1: 0.2401
Epoch 8/10
363/363 [==============================] - 1s 2ms/step - loss: 0.4314 - huber_metric_1: 0.2113 - val_loss: 0.4798 - val_huber_metric_1: 0.2339
Epoch 9/10
363/363 [==============================] - 1s 2ms/step - loss: 0.4366 - huber_metric_1: 0.2143 - val_loss: 0.4711 - val_huber_metric_1: 0.2295
Epoch 10/10
363/363 [==============================] - 1s 2ms/step - loss: 0.4357 - huber_metric_1: 0.2135 - val_loss: 0.4757 - val_huber_metric_1: 0.2313
import pandas as pd
pd.DataFrame(history.history).plot()
plt.grid(True)
plt.show()
model.save("custom_metric_model.h5")
=== get_config: 2.0
# 重新加载模型
model_load = keras.models.load_model("custom_metric_model.h5",
custom_objects={"HuberMetric": HuberMetric,}
)
# 查看保存的阈值
model.metrics[-1].threshold
2.0
model_load.fit(X_train, y_train, epochs=3, validation_data=(X_valid, y_valid))
Epoch 1/3
363/363 [==============================] - 1s 3ms/step - loss: 0.4293 - huber_metric_1: 0.2104 - val_loss: 0.4635 - val_huber_metric_1: 0.2258
Epoch 2/3
363/363 [==============================] - 1s 2ms/step - loss: 0.4254 - huber_metric_1: 0.2085 - val_loss: 0.4603 - val_huber_metric_1: 0.2245
Epoch 3/3
363/363 [==============================] - 1s 2ms/step - loss: 0.4289 - huber_metric_1: 0.2095 - val_loss: 0.4533 - val_huber_metric_1: 0.2211
<tensorflow.python.keras.callbacks.History at 0x7f5fa00ef0d0>
三、自定义层
3.1 没有权重的层
没有权重的层可用于处理输入或输出的数据,例如: keras.layers.Flatten
可以使用keras.laers.Lambda 来返回一个不带权重的层
keras.backend.clear_session()
# 对数据计算指数
exp_layer = keras.layers.Lambda(lambda x: tf.exp(x))
# 可视化数据
x = tf.range(-5.0, limit=5, delta=0.3, dtype=tf.float32)
print(x.shape)
y = exp_layer(x)
plt.plot(x, y, "b.")
plt.grid(True)
plt.title("exp layer", fontsize=12)
(34,)
Text(0.5, 1.0, 'exp layer')
model = keras.models.Sequential(
[
keras.layers.Dense(30, activation="relu", input_shape=X_train.shape[1:]),
keras.layers.Dense(1),
exp_layer
]
)
model.compile(loss="mse", optimizer="nadam")
history = model.fit(X_train, y_train, epochs=5, validation_data=(X_valid, y_valid))
model.evaluate(X_test, y_test)
Epoch 1/5
363/363 [==============================] - 1s 2ms/step - loss: 1.5692 - val_loss: 0.6715
Epoch 2/5
363/363 [==============================] - 1s 2ms/step - loss: 0.5721 - val_loss: 0.4963
Epoch 3/5
363/363 [==============================] - 1s 2ms/step - loss: 0.4394 - val_loss: 4.0413
Epoch 4/5
363/363 [==============================] - 1s 2ms/step - loss: 0.4066 - val_loss: 0.4192
Epoch 5/5
363/363 [==============================] - 1s 2ms/step - loss: 0.3802 - val_loss: 0.4033
162/162 [==============================] - 0s 964us/step - loss: 0.4317
0.4316927194595337
3.2 带有权重的层
带有权重的层,则继承keras.layers.Layer即可实现
class MyDense(keras.layers.Layer):
def __init__(self, units, activation=None, print_log=False, **kwargs):
super().__init__(**kwargs)
self.units = units
self.activation = keras.activations.get(activation)
self.print_log = print_log
def build(self, batch_input_shape):
if self.print_log:
print(f"=== build被调用, shape: {batch_input_shape}")
self.kernel = self.add_weight(name="kernel",
shape=[batch_input_shape[-1], self.units],
initializer="glorot_normal")
self.bias = self.add_weight(name = "bias", shape=[self.units],
initializer="zeros")
super().build(batch_input_shape)
def call(self, X):
if self.print_log:
print(f"=== call 被调用")
return self.activation(X @ self.kernel + self.bias)
def compute_output_shape(self, batch_input_shape):
if self.print_log:
print(f"== compute_output_shape, shape:{batch_input_shape}")
shape = tf.TensorShape(batch_input_shape.as_list()[:-1] + [self.units])
if self.print_log:
print(f"== compute_output_shape, out_shape:{shape}")
return shape
def get_config(self):
base_config = super().get_config()
return {**base_config, "units": self.units,
"activation": keras.activations.serialize(self.activation)
}
keras.backend.clear_session()
model = keras.models.Sequential([
MyDense(30, activation="relu", input_shape=X_train.shape[1:], print_log=True),
MyDense(1, print_log=True)
])
model.compile(loss="mse", optimizer="nadam")
history = model.fit(X_train, y_train, epochs=5,
validation_data=(X_valid, y_valid))
=== build被调用, shape: (None, 8)
=== call 被调用
=== build被调用, shape: (None, 30)
=== call 被调用
Epoch 1/5
=== call 被调用
=== call 被调用
=== call 被调用
=== call 被调用
330/363 [==========================>...] - ETA: 0s - loss: 4.3489=== call 被调用
=== call 被调用
363/363 [==============================] - 1s 2ms/step - loss: 4.1639 - val_loss: 0.7568
Epoch 2/5
363/363 [==============================] - 1s 2ms/step - loss: 0.6674 - val_loss: 0.6018
Epoch 3/5
363/363 [==============================] - 1s 2ms/step - loss: 0.5172 - val_loss: 0.5092
Epoch 4/5
363/363 [==============================] - 1s 2ms/step - loss: 0.4652 - val_loss: 0.4663
Epoch 5/5
363/363 [==============================] - 1s 2ms/step - loss: 0.4254 - val_loss: 0.4468
model.evaluate(X_test, y_test)
162/162 [==============================] - 0s 1ms/step - loss: 0.4231
0.4230932295322418
# 保存模型
model_name = "custom_layer_model_1.h5"
model.save(model_name)
# 重新加载model
model_reload = keras.models.load_model(model_name, custom_objects={
"MyDense": MyDense
})
model_reload.summary()
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
my_dense (MyDense) (None, 30) 270
_________________________________________________________________
my_dense_1 (MyDense) (None, 1) 31
=================================================================
Total params: 301
Trainable params: 301
Non-trainable params: 0
_________________________________________________________________
3.3 创建一个多输入多输出的层
class MyMultiLayer(keras.layers.Layer):
def call(self, X):
X0, X1 = X
return [X0 + X1, X0 * X1]
def compute_output_shape(self, batch_input_shape):
print(f"== shape: {batch_input_shape}")
b1, b2 = batch_input_shape
return [b1, b2]
keras.backend.clear_session()
input1 = keras.layers.Input(shape=[2])
input2 = keras.layers.Input(shape=[2])
output1, output2 = MyMultiLayer()((input1, input2))
3.4 创建只在训练期间做一些特殊处理的层
class MyGaussianNoise(keras.layers.Layer):
def __init__(self, stddv, **kwargs):
super().__init__(**kwargs)
self.staddv = stddv
def call(self, X, training=None):
if training:
noise = tf.random.normal(tf.shape(X), stddev=self.stddv,
)
return X + noise
return X
def compute_output_shape(self, batch_input_shape):
return batch_input_shape
keras.backend.clear_session()
model.compile(loss="mse", optimizer="nadam")
history = model.fit(X_train, y_train, epochs=20, validation_data=(X_valid, y_valid))
Epoch 1/20
=== call 被调用
=== call 被调用
=== call 被调用
=== call 被调用
326/363 [=========================>....] - ETA: 0s - loss: 0.3801=== call 被调用
=== call 被调用
363/363 [==============================] - 1s 2ms/step - loss: 0.3824 - val_loss: 0.4380
Epoch 2/20
363/363 [==============================] - 1s 2ms/step - loss: 0.3984 - val_loss: 0.4214
Epoch 3/20
363/363 [==============================] - 1s 2ms/step - loss: 0.3782 - val_loss: 0.4124
Epoch 4/20
363/363 [==============================] - 1s 3ms/step - loss: 0.3996 - val_loss: 0.4117
Epoch 5/20
363/363 [==============================] - 1s 2ms/step - loss: 0.3807 - val_loss: 0.4058
Epoch 6/20
363/363 [==============================] - 1s 2ms/step - loss: 0.3776 - val_loss: 0.4045
Epoch 7/20
363/363 [==============================] - 1s 2ms/step - loss: 0.3565 - val_loss: 0.3990
Epoch 8/20
363/363 [==============================] - 1s 2ms/step - loss: 0.3749 - val_loss: 0.3968
Epoch 9/20
363/363 [==============================] - 1s 2ms/step - loss: 0.3687 - val_loss: 0.3939
Epoch 10/20
363/363 [==============================] - 1s 2ms/step - loss: 0.3550 - val_loss: 0.3909
Epoch 11/20
363/363 [==============================] - 1s 2ms/step - loss: 0.3600 - val_loss: 0.3950
Epoch 12/20
363/363 [==============================] - 1s 2ms/step - loss: 0.3589 - val_loss: 0.3964
Epoch 13/20
363/363 [==============================] - 1s 2ms/step - loss: 0.3580 - val_loss: 0.3848
Epoch 14/20
363/363 [==============================] - 1s 2ms/step - loss: 0.3554 - val_loss: 0.3849
Epoch 15/20
363/363 [==============================] - 1s 2ms/step - loss: 0.3474 - val_loss: 0.3825
Epoch 16/20
363/363 [==============================] - 1s 2ms/step - loss: 0.3462 - val_loss: 0.3815
Epoch 17/20
363/363 [==============================] - 1s 2ms/step - loss: 0.3591 - val_loss: 0.3827
Epoch 18/20
363/363 [==============================] - 1s 2ms/step - loss: 0.3478 - val_loss: 0.3805
Epoch 19/20
363/363 [==============================] - 1s 2ms/step - loss: 0.3469 - val_loss: 0.3835
Epoch 20/20
363/363 [==============================] - 1s 2ms/step - loss: 0.3397 - val_loss: 0.3764
pd.DataFrame(history.history).plot()
plt.grid(True)
plt.show()
model.evaluate(X_test, y_test)
162/162 [==============================] - 0s 1ms/step - loss: 0.3617
0.36171987652778625
model.summary()
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
my_dense (MyDense) (None, 30) 270
_________________________________________________________________
my_dense_1 (MyDense) (None, 1) 31
=================================================================
Total params: 301
Trainable params: 301
Non-trainable params: 0
_________________________________________________________________