14. Tensorflow2.0 梯度下降,函数优化实战,手写数字问题实战以及Tensorboard可视化!

1. 梯度下降(Gradient Descent)

1.1. What’s Gradient

在这里插入图片描述

1.2. What does it mean

在这里插入图片描述

1.3. How to Search

在这里插入图片描述

1.4. AutoGrad

在这里插入图片描述

  • GradientTape
    在这里插入图片描述

  • Persistent GradientTape
    在这里插入图片描述

  • 2nd-order
    在这里插入图片描述

import tensorflow as tf

w = tf.Variable(1.)
b = tf.Variable(2.)
x = tf.Variable(3.)

with tf.GradientTape() as t1:
    with tf.GradientTape() as t2:
        y = x * w + b
    dy_dw, dy_db = t2.gradient(y, [w, b])
d2y_dw2 = t1.gradient(dy_dw, w)

print(dy_dw, dy_db, d2y_dw2)
assert dy_dw.numpy() == 3.0
assert d2y_dw2 is None

2. 激活函数及其梯度

2.1. Sigmoid / Logistic(0~1)

在这里插入图片描述
在这里插入图片描述

import tensorflow as tf

a = tf.linspace(-10., 10., 10)

with tf.GradientTape() as tape:
    tape.watch(a)
    y = tf.sigmoid(a)

grads = tape.gradient(y, [a])
print(a)
print(y)
print(grads)

2.2. tanh(-1~1)

在这里插入图片描述
在这里插入图片描述

import tensorflow as tf

a = tf.linspace(-5., 5., 10)

with tf.GradientTape() as tape:
    tape.watch(a)
    y = tf.tanh(a)

grads = tape.gradient(y, [a])
print(a)
print(y)
print(grads)

2.3. relu(Rectified Linear Unit)

在这里插入图片描述
在这里插入图片描述

3. 损失函数及其梯度

3.1. Mean Squared Error

在这里插入图片描述
在这里插入图片描述

  • MSE Gradient
import tensorflow as tf

x = tf.random.normal([2, 4])
w = tf.random.normal([4, 3])
b = tf.zeros([3])
y = tf.constant([2, 0])

with tf.GradientTape() as tape:
    tape.watch([w, b])  # 注意: 这里如果不写watch,则w, b必须定义成tf.Variable类型
    prob = tf.nn.softmax(x@w+b, axis=1)
    loss = tf.reduce_mean(tf.losses.MSE(tf.one_hot(y, depth=3), prob))

grads = tape.gradient(loss, [w, b])
print(grads[0])
print(grads[1])

3.2. Cross Entropy Loss

在这里插入图片描述

  • softmax函数的作用
    • 把logit的值映射到0~1之间, 并且使得概率之后为1
    • 使强的更强,弱的更弱

在这里插入图片描述

  • Crossentropy gradient
import tensorflow as tf

x = tf.random.normal([2, 4])
w = tf.random.normal([4, 3])
b = tf.zeros([3])
y = tf.constant([2, 0])

with tf.GradientTape() as tape:
    tape.watch([w, b])  # 注意: 这里如果不写watch,则w, b必须定义成tf.Variable类型
    logits = x@w+b
    loss = tf.reduce_mean(tf.losses.categorical_crossentropy(tf.one_hot(y, depth=3), logits, from_logits=True))

grads = tape.gradient(loss, [w, b])
print(grads[0])
print(grads[1])

4. Himmelblau函数优化

4.1. Himmelblau function

在这里插入图片描述

4.2. Minima

在这里插入图片描述

4.3. Gradient Descent

import tensorflow as tf
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import pyplot as plt


def himmelblau(x):
    return (x[0] ** 2 + x[1] - 11) ** 2 + (x[0] + x[1] ** 2 - 7)


x = np.arange(-6, 6, 0.1)
y = np.arange(-6, 6, 0.1)
print('x, y range:', x.shape, y.shape)
X, Y = np.meshgrid(x, y)
print('X, Y maps: ', X.shape, Y.shape)
Z = himmelblau([X, Y])

fig = plt.figure('himmelblau')
ax = fig.gca(projection='3d')
ax.plot_surface(X, Y, Z)
ax.view_init(60, -30)
ax.set_xlabel('x')
ax.set_ylabel('y')
plt.show()

# [1., 0.], [-4, 0.], [4, 0.]
x = tf.constant([-4, 0.])

for step in range(200):
    with tf.GradientTape() as tape:
        tape.watch([x])
        y = himmelblau(x)

    grads = tape.gradient(y, [x])[0]
    x -= 0.01 *grads

    if step % 20 == 0:
        print('step {}: x = {}, f(x) = {}'
              .format(step, x.numpy(), y.numpy()))

5. FashionMNIST实战

import tensorflow as tf
from tensorflow.keras import datasets, Sequential, layers, optimizers


def pre_process(x, y):
    # 在预处理之间, 数据类型已经从numpy类型转换成tensor类型
    x = tf.cast(x, dtype=tf.float32) / 255.
    y = tf.cast(y, dtype=tf.int32)
    return x, y


(x, y), (x_test, y_test) = datasets.fashion_mnist.load_data()
print(x.shape, y.shape, type(x), type(y), x.dtype, y.dtype)

db = tf.data.Dataset.from_tensor_slices((x, y))
db = db.map(pre_process).shuffle(10000).batch(128)

db_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))
db_test = db_test.map(pre_process).batch(128) # 测试集不需要打散

# db_iter = iter(db)
# sample = next(db_iter)
# print("batch: ", sample[0].shape, sample[1].shape)

# 构建模型
model = Sequential([
    layers.Dense(256, activation=tf.nn.relu),  # [b, 784] ==> [b, 256]
    layers.Dense(128, activation=tf.nn.relu),  # [b, 256] ==> [b, 128]
    layers.Dense(64, activation=tf.nn.relu),   # [b, 128] ==> [b, 64]
    layers.Dense(32, activation=tf.nn.relu),   # [b, 64] ==> [b, 32]
    layers.Dense(10, activation=tf.nn.relu),   # [b, 32] ==> [b, 10]
])

model.build(input_shape=[None, 28*28])
model.summary()

# 优化器
optimizer = optimizers.Adam(learning_rate=1e-3)


def main():
    for epoch in range(30):
        for step, (x, y) in enumerate(db):
            # x: [b, 28, 28] => [b, 784]
            # y: [b]
            # 对矩阵进行变换
            x = tf.reshape(x, [-1, 28 * 28])

            with tf.GradientTape() as tape:
                # [b, 784] => [b, 10]
                logits = model(x)
                # 对真实值进行onehot编码
                y_onehot = tf.one_hot(y, depth=10)
                # 均方误差
                loss_mse = tf.reduce_mean(tf.losses.MSE(y_onehot, logits))
                # 交叉熵损失
                loss_ec = tf.reduce_mean(tf.losses.categorical_crossentropy(y_onehot, logits, from_logits=True))

            grads = tape.gradient(loss_ec, model.trainable_variables)
            optimizer.apply_gradients(zip(grads, model.trainable_variables))

            if step % 100 == 0:
                print(epoch, step, 'loss: ', float(loss_ec), float(loss_mse))

        # test
        total_correct = 0
        total_num = 0
        for x, y in db_test:
            # x:[b, 28, 28] => [b, 784]
            # y:[b]
            x = tf.reshape(x, [-1, 28*28])
            # [b, 10]
            logits = model(x)
            # logits => prob, [b, 10]
            prob = tf.nn.softmax(logits, axis=1)
            # [b, 10] => [b], int64
            pred = tf.argmax(prob, axis=1)
            pred = tf.cast(pred, dtype=tf.int32)

            correct = tf.equal(pred, y)
            correct = tf.reduce_sum(tf.cast(correct, dtype=tf.int32))
            total_correct += int(correct)
            total_num += x.shape[0]

        acc = total_correct / total_num
        print(epoch, 'test acc:', acc)


if __name__ == '__main__':
    main()
    pass

6. Tensorboard可视化

在这里插入图片描述

6.1. 工作原理

  • Listen logdir
  • build summary instance
  • fed data into summary instance

6.2. Step1.run listener

在这里插入图片描述

6.2. Step2.build summary

在这里插入图片描述

6.3. Step3.fed data

  • scalar
    在这里插入图片描述
  • single Image
    在这里插入图片描述
    !
    在这里插入图片描述
  • multi image
    在这里插入图片描述
    在这里插入图片描述

6.4. Code

import tensorflow as tf
from tensorflow.keras import datasets, layers, optimizers, Sequential, metrics
import datetime
from matplotlib import pyplot as plt
import io


def preprocess(x, y):
    x = tf.cast(x, dtype=tf.float32) / 255.
    y = tf.cast(y, dtype=tf.int32)

    return x, y


def plot_to_image(figure):
    """Converts the matplotlib plot specified by 'figure' to a PNG image and
    returns it. The supplied figure is closed and inaccessible after this call."""
    # Save the plot to a PNG in memory.
    buf = io.BytesIO()
    plt.savefig(buf, format='png')
    # Closing the figure prevents it from being displayed directly inside
    # the notebook.
    plt.close(figure)
    buf.seek(0)
    # Convert PNG buffer to TF image
    image = tf.image.decode_png(buf.getvalue(), channels=4)
    # Add the batch dimension
    image = tf.expand_dims(image, 0)
    return image


def image_grid(images):
    """Return a 5x5 grid of the MNIST images as a matplotlib figure."""
    # Create a figure to contain the plot.
    figure = plt.figure(figsize=(10, 10))
    for i in range(25):
        # Start next subplot.
        plt.subplot(5, 5, i + 1, title='name')
        plt.xticks([])
        plt.yticks([])
        plt.grid(False)
        plt.imshow(images[i], cmap=plt.cm.binary)

    return figure


batchsz = 128
(x, y), (x_val, y_val) = datasets.mnist.load_data()
print('datasets:', x.shape, y.shape, x.min(), x.max())

db = tf.data.Dataset.from_tensor_slices((x, y))
db = db.map(preprocess).shuffle(60000).batch(batchsz).repeat(10)

ds_val = tf.data.Dataset.from_tensor_slices((x_val, y_val))
ds_val = ds_val.map(preprocess).batch(batchsz, drop_remainder=True)

network = Sequential([layers.Dense(256, activation='relu'),
                      layers.Dense(128, activation='relu'),
                      layers.Dense(64, activation='relu'),
                      layers.Dense(32, activation='relu'),
                      layers.Dense(10)])
network.build(input_shape=(None, 28 * 28))
network.summary()

optimizer = optimizers.Adam(lr=0.01)

current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
log_dir = 'logs/' + current_time
summary_writer = tf.summary.create_file_writer(log_dir)

# get x from (x,y)
sample_img = next(iter(db))[0]
# get first image instance
sample_img = sample_img[0]
sample_img = tf.reshape(sample_img, [1, 28, 28, 1])
with summary_writer.as_default():
    tf.summary.image("Training sample:", sample_img, step=0)

for step, (x, y) in enumerate(db):

    with tf.GradientTape() as tape:
        # [b, 28, 28] => [b, 784]
        x = tf.reshape(x, (-1, 28 * 28))
        # [b, 784] => [b, 10]
        out = network(x)
        # [b] => [b, 10]
        y_onehot = tf.one_hot(y, depth=10)
        # [b]
        loss = tf.reduce_mean(tf.losses.categorical_crossentropy(y_onehot, out, from_logits=True))

    grads = tape.gradient(loss, network.trainable_variables)
    optimizer.apply_gradients(zip(grads, network.trainable_variables))

    if step % 100 == 0:
        print(step, 'loss:', float(loss))
        with summary_writer.as_default():
            tf.summary.scalar('train-loss', float(loss), step=step)

            # evaluate
    if step % 500 == 0:
        total, total_correct = 0., 0

        for _, (x, y) in enumerate(ds_val):
            # [b, 28, 28] => [b, 784]
            x = tf.reshape(x, (-1, 28 * 28))
            # [b, 784] => [b, 10]
            out = network(x)
            # [b, 10] => [b]
            pred = tf.argmax(out, axis=1)
            pred = tf.cast(pred, dtype=tf.int32)
            # bool type
            correct = tf.equal(pred, y)
            # bool tensor => int tensor => numpy
            total_correct += tf.reduce_sum(tf.cast(correct, dtype=tf.int32)).numpy()
            total += x.shape[0]

        print(step, 'Evaluate Acc:', total_correct / total)

        # print(x.shape)
        val_images = x[:25]
        val_images = tf.reshape(val_images, [-1, 28, 28, 1])
        with summary_writer.as_default():
            tf.summary.scalar('test-acc', float(total_correct / total), step=step)
            tf.summary.image("val-onebyone-images:", val_images, max_outputs=25, step=step)

            val_images = tf.reshape(val_images, [-1, 28, 28])
            figure = image_grid(val_images)
            tf.summary.image('val-images:', plot_to_image(figure), step=step)

7. 需要全套课程视频+PPT+代码资源可以私聊我

  • 方式1:CSDN私信我!
  • 方式2:QQ邮箱:594042358@qq.com或者直接加我QQ: 594042358!
  • 1
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值