Tensorflow2.0 自定义损失函数,重写layer层, tf.function,图结构, tf.gradient等（第三节）_layergradientxactivation(model, layer)这个类可以用在tenso-CSDN博客

本文链接：https://blog.csdn.net/qq_34964399/article/details/104304156

import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import pandas as pd
import os
import sys
import time
import sklearn
from tensorflow import keras

import tensorflow as tf
print(tf.__version__)
print(sys.version_info)
for module in mpl, np, pd, sklearn, tf, keras:
    print(module.__name__, module.__version__)

2.0.0
sys.version_info(major=3, minor=7, micro=3, releaselevel=‘final’, serial=0)
matplotlib 3.0.3
numpy 1.16.2
pandas 0.24.2
sklearn 0.20.3
tensorflow 2.0.0
tensorflow_core.keras 2.2.4-tf

from sklearn.datasets import fetch_california_housing
housing = fetch_california_housing()
from sklearn.model_selection import train_test_split
x_train_all, x_test, y_train_all, y_test = train_test_split(
    housing.data, housing.target, random_state = 7)
x_train, x_valid, y_train, y_valid = train_test_split(
    x_train_all, y_train_all, random_state = 11)
x_train_all.shape, x_test.shape, x_train.shape, x_valid.shape

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_valid_scaled = scaler.transform(x_valid)
x_test_scaled = scaler.transform(x_test)

def plot_learning_curves(history):
    pd.DataFrame(history.history).plot(figsize=(8, 5))
    plt.grid(True)
    plt.gca().set_ylim(0, 1)
    plt.show()

3-5 自定义损失函数

def  customized_mse(y_true, y_pred):
    return tf.reduce_mean(tf.square(y_pred - y_true))

model = keras.models.Sequential([
    keras.layers.Dense(30, activation='relu', 
                              input_shape=x_train.shape[1:]),
    keras.layers.Dense(1),
])

model.compile(loss=customized_mse, optimizer='sgd',
                     metrics=['mean_squared_error'])
callbacks = [ keras.callbacks.EarlyStopping(patience=5, min_delta=1e-2)]
history = model.fit(x_train_scaled, y_train,
                   validation_data = (x_valid_scaled, y_valid),
                   epochs=10, callbacks = callbacks)
# 这样发现自定义的loss值与 mean_squared_error的值是一样的

3-6 重写layer层

# tf.nn.softplus : log(1+e^x)
customized_softplus = keras.layers.Lambda(lambda x : tf.nn.softplus(x))
# customized dense layer.
class CustomizedDenseLayer(keras.layers.Layer):
    def __init__(self, units, activation=None, **kwargs):
        self.units = units
        self.activation = keras.layers.Activation(activation)
        super(CustomizedDenseLayer, self).__init__(**kwargs)
    
    def build(self, input_shape):
        """构建所需要的参数"""
        # x * w + b
        #  kernel 就是 w，所以w的shape就是(input_shape[1], self.units) 
        # bias 就是 b，
#         print('input_shape', input_shape) # (None, 8)
        self.kernel = self.add_weight(name = 'kernel',
                                      shape = (input_shape[1], self.units),
                                      initializer = 'uniform',
                                      trainable = True)
        self.bias = self.add_weight(name = 'bias',
                                    shape = (self.units, ),
                                    initializer = 'zeros',
                                    trainable = True)
        super(CustomizedDenseLayer, self).build(input_shape)
    
    def call(self, x):
        """完成正向计算"""
#         print(self.kernel.shape, self.bias.shape)
        return self.activation(x @ self.kernel + self.bias)

model = keras.models.Sequential([
    CustomizedDenseLayer(30, activation='relu',
                         input_shape=x_train.shape[1:]), # x_train.shape[1:] : (8,)
    CustomizedDenseLayer(1),
#     customized_softplus,
#     keras.layers.Dense(1, activation="softplus"),
#     keras.layers.Dense(1), keras.layers.Activation('softplus'),
])
model.summary()
model.compile(loss="mean_squared_error", optimizer="sgd")
callbacks = [keras.callbacks.EarlyStopping(
    patience=5, min_delta=1e-2)]

在这里插入图片描述
接下来就可以训练了

history = model.fit(x_train_scaled, y_train, 
        validation_data=(x_valid_scaled, y_valid),
        epochs=20,
         callbacks = callbacks)

3.7 tf.function 函数转换为graph

def scaled_elu(z, scale=1.0, alpha=1.0):
    # 实现三目元算符 scale*z if z >=0 else scale*alpha*tf.nn.elu(z)
    #  tf.nn.elu(z) 为激活函数 如果z>=0返回z, 否则返回 exp(z) - 1
    is_positive = tf.greater(z, 0.0)
    return scale * tf.where(is_positive, z, alpha * tf.nn.elu(z)) 
print(scaled_elu(tf.constant(0.1)))
print(scaled_elu(tf.constant([0.1, -3.0])))

3.71 转为tf.function第一种方式

tf_scaled_elu = tf.function(scaled_elu)

3.72 转为tf.function第二种方式在函数名上添加装饰器 @tf.function 指定输入类型 @tf.function( input_signature=[tf.TensorSpec([None], tf.float32, name=‘x’)])

# tf.function运行时间更快
%timeit scaled_elu(tf.random.normal((1000, 1000)))
%timeit tf_scaled_elu(tf.random.normal((1000, 1000)))

在这里插入图片描述

# 将python函数转换为tf函数形式
def display_tf_code(func):
    code = tf.autograph.to_code(func)
    from IPython.display import display, Markdown
    display(Markdown(' ```python\n{}\n```'.format(code) ))
display_tf_code(scaled_elu)

3.9 图结构

# get_concrete_function -> add input signature -> SaveModel 将python函数变为可以保存的SaveModel
@tf.function
def cube(z):
    tf.pow(z, 3)
cube(tf.constant([1,2,3]))
cube_func_int32 = cube.get_concrete_function(
    tf.TensorSpec([None], tf.int32, name='a'))
print(cube_func_int32)

图结构的一些操作

cube_func_int32.graph.get_operations()

在这里插入图片描述

pow_op = cube_func_int32.graph.get_operations()[2]  # 就是我们定时的函数pow功能
print(pow_op)

在这里插入图片描述

print(list(pow_op.inputs))  # cube_func_int32.graph.get_operations()的前两项为输入
print(list(pow_op.outputs))

在这里插入图片描述

# 根据名字获取
cube_func_int32.graph.get_operation_by_name('a')
cube_func_int32.graph.get_tensor_by_name('a:0')
# 获取所有节点信息
cube_func_int32.graph.as_graph_def()

3-10 近似求导

def f(x):
    return 3. * x ** 2 + 2. * x - 1

def derivate(f, x, eps=1e-3):
    return (f(x+eps) - f(x-eps)) / (2.*eps)

def g(x1, x2):
    return (x1 +5) * (x2 ** 2)

# python模拟求导
def gradient(x1, x2):
    x1_derivate = derivate(lambda x:g(x,x2), x1)
    x2_derivate = derivate(lambda x:g(x1,x), x2)
    return x1_derivate, x2_derivate

gradient(2., 3.)

3-11 tr.GradientTape基本基本使用

x1 = tf.Variable(2.)
x2 = tf.Variable(3.)
with tf.GradientTape() as tape: 
    z = g(x1, x2)
    
dz_x1 = tape.gradient(z, x1)
print(dz_x1)
# 需要再次打开求x2偏导
with tf.GradientTape() as tape: 
    z = g(x1, x2)
dz_x2 = tape.gradient(z, x2)
dz_x2

上面每次只能加载一次，只要加上persistent=True则就可以了，但是要手动关闭

with tf.GradientTape(persistent=True) as tape:
    z = g(x1, x2)
dz_x1 = tape.gradient(z, x1)
dz_x2 = tape.gradient(z, x2)
print(dz_x1, dz_x2)
# 也可以一次对x1, x2 同时求偏导
dz_x1_x2 = tape.gradient(z, [x1, x2])
print(dz_x1_x2)
# 删除tape
del tape

上面的是一个函数求导，现在多个函数对一个x求导

x = tf.Variable(5.0)
with tf.GradientTape() as tape:
    z1 = 3 * x
    z2 = x ** 2
tape.gradient([z1, z2], x)  # numpy=13 = z1 + z2的偏导

求二阶偏导

x1 = tf.Variable(2.0)
x2 = tf.Variable(3.0)
with tf.GradientTape(persistent=True) as outer_tape:
    with tf.GradientTape(persistent=True) as inner_tape:
        z = g(x1, x2)
    inner_grad = inner_tape.gradient(z, [x1, x2])
outer_grad = [outer_tape.gradient(inner_func, [x1, x2]) for inner_func in inner_grad]
outer_grad # 为(2,2)矩阵，左上角None为对x1求一阶和二阶偏导，(右上角为对x1求一阶导数再对x2求二阶导数 : 保留意见)，同理所有

在这里插入图片描述
进行梯度下降

learning_rate = 0.1
x = tf.Variable(0.0)
for _ in range(100):
    with tf.GradientTape() as tape:
        z = f(x)
    dz_dx = tape.gradient(z, x)
    x.assign_sub(learning_rate * dz_dx)
print(x)

<tf.Variable ‘Variable:0’ shape=() dtype=float32, numpy=-0.3333333>

使用optimizer模拟

learning_rate = 0.1
x = tf.Variable(0.0)

optimizer = keras.optimizers.SGD(lr = learning_rate)
for _ in range(100):
    with tf.GradientTape() as tape:
        z = f(x)
    dz_dx = tape.gradient(z, x)
    optimizer.apply_gradients([(dz_dx, x)])  # 使用optimizer的apply_gradients来更新变量x
print(x)

<tf.Variable ‘Variable:0’ shape=() dtype=float32, numpy=-0.3333333>

3-12 tf.GradientTape 和 tf.keras结合

from sklearn.datasets import fetch_california_housing
housing = fetch_california_housing()
from sklearn.model_selection import train_test_split
x_train_all, x_test, y_train_all, y_test = train_test_split(
    housing.data, housing.target, random_state = 7)
x_train, x_valid, y_train, y_valid = train_test_split(
    x_train_all, y_train_all, random_state = 11)
x_train_all.shape, x_test.shape, x_train.shape, x_valid.shape
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_valid_scaled = scaler.transform(x_valid)
x_test_scaled = scaler.transform(x_test)

metric的使用

metric = keras.metrics.MeanSquaredError()
print(metric([5.], [2.] ))
print(metric([0.], [1.])) # 结果会累加
print(metric.result())

metric.reset_states()  # 重置不累加
print(metric([0.], [1.]))

在这里插入图片描述

epochs = 5
batch_size = 32
steps_per_epoch = len(x_train_scaled) // batch_size
optimizer = keras.optimizers.SGD()
metric = keras.metrics.MeanSquaredError()

def random_batch(x, y, batch_size=batch_size):
    idx = np.random.randint(0, len(x), size=batch_size)
    return x[idx], y[idx]

model = keras.models.Sequential([
    keras.layers.Dense(30, activation='relu',
                      input_shape=x_train.shape[1:]),
    keras.layers.Dense(1),
])

# model的fit做了哪些事情
# 1. batch 遍历训练集 得到metric
#      1.1 自动求导
# 2. epoch结束 验证集 得到metric

# 下面模拟fit函数功能 , 效果并没有原生的效果好
for epoch in range(epochs):
    metric.reset_states()
    for step in range(steps_per_epoch):
        x_batch, y_batch = random_batch(x_train_scaled, y_train, batch_size)
        with tf.GradientTape() as tape:
            y_pred = model(x_batch)
            loss = tf.reduce_mean(keras.losses.mean_squared_error(y_batch, y_pred))
            metric(y_batch, y_pred)
        grads = tape.gradient(loss, model.variables)
        grads_and_vars = zip(grads, model.variables)
        optimizer.apply_gradients(grads_and_vars)  # 进行梯度更新
        print("\rEpoch", epoch, " train_mse:", 
             metric.result().numpy(), end="")
        
    y_valid_pred = model(x_valid_scaled)
    valid_loss = tf.reduce_mean(keras.losses.mean_squared_error(y_valid_pred, y_valid))
    print("\t", "valid mse", valid_loss.numpy())