深度学习—Tensorflow2.0-1

import numpy as np




# y = wx + b
def compute_error_for_line_given_points(b, w, points):
    totalError = 0
    for i in range(0, len(points)):
        x = points[i, 0]
        y = points[i, 1]
        # computer mean-squared-error
        totalError += (y - (w * x + b)) ** 2
    # average loss for each point
    return totalError / float(len(points))



def step_gradient(b_current, w_current, points, learningRate):
    b_gradient = 0
    w_gradient = 0
    N = float(len(points))
    for i in range(0, len(points)):
        x = points[i, 0]
        y = points[i, 1]
        # grad_b = 2(wx+b-y)
        b_gradient += (2/N) * ((w_current * x + b_current) - y)
        # grad_w = 2(wx+b-y)*x
        w_gradient += (2/N) * x * ((w_current * x + b_current) - y)
    # update w'
    new_b = b_current - (learningRate * b_gradient)
    new_w = w_current - (learningRate * w_gradient)
    return [new_b, new_w]

def gradient_descent_runner(points, starting_b, starting_w, learning_rate, num_iterations):
    b = starting_b
    w = starting_w
    # update for several times
    for i in range(num_iterations):
        b, w = step_gradient(b, w, np.array(points), learning_rate)
    return [b, w]


def run():
    points = np.genfromtxt("data.csv", delimiter=",")
    learning_rate = 0.0001
    initial_b = 0 # initial y-intercept guess
    initial_w = 0 # initial slope guess
    num_iterations = 1000
    print("Starting gradient descent at b = {0}, w = {1}, error = {2}"
          .format(initial_b, initial_w,
                  compute_error_for_line_given_points(initial_b, initial_w, points))
          )
    print("Running...")
    [b, w] = gradient_descent_runner(points, initial_b, initial_w, learning_rate, num_iterations)
    print("After {0} iterations b = {1}, w = {2}, error = {3}".
          format(num_iterations, b, w,
                 compute_error_for_line_given_points(b, w, points))
          )

if __name__ == '__main__':
    run()

1.2手写数字问题

minst数据集：【28，28，1】，28行，28列，1个RGB通道

打平得到【28，28】-》【28*28】

引入非线性的relu函数

寻找最优得这六个参数，进行了三次relu

总结：

然后根据index辨别类别

1.2手写数字问题实战

MNIST数据集实战：

Step1&2.Compute out&loss

Step3.Compute gradient and optimize

Step4：loop

源代码：

import  os
import  tensorflow as tf
from    tensorflow import keras
from    tensorflow.keras import layers, optimizers, datasets

os.environ['TF_CPP_MIN_LOG_LEVEL']='2'

(x, y), (x_val, y_val) = datasets.mnist.load_data() 
x = tf.convert_to_tensor(x, dtype=tf.float32) / 255.
y = tf.convert_to_tensor(y, dtype=tf.int32)
y = tf.one_hot(y, depth=10)
print(x.shape, y.shape)
train_dataset = tf.data.Dataset.from_tensor_slices((x, y))
train_dataset = train_dataset.batch(200)


model = keras.Sequential([ 
    layers.Dense(512, activation='relu'),
    layers.Dense(256, activation='relu'),
    layers.Dense(10)])

optimizer = optimizers.SGD(learning_rate=0.001)


def train_epoch(epoch):

    # Step4.loop
    for step, (x, y) in enumerate(train_dataset):


        with tf.GradientTape() as tape:
            # [b, 28, 28] => [b, 784]
            x = tf.reshape(x, (-1, 28*28))
            # Step1. compute output
            # [b, 784] => [b, 10]
            out = model(x)
            # Step2. compute loss
            loss = tf.reduce_sum(tf.square(out - y)) / x.shape[0]

        # Step3. optimize and update w1, w2, w3, b1, b2, b3
        grads = tape.gradient(loss, model.trainable_variables)
        # w' = w - lr * grad
        optimizer.apply_gradients(zip(grads, model.trainable_variables))

        if step % 100 == 0:
            print(epoch, step, 'loss:', loss.numpy())


def train():

    for epoch in range(30):

        train_epoch(epoch)


if __name__ == '__main__':
    train()

2.Tensorflow基础操作

2.1数据类型

Tensorflow安装

百度清华镜像安装，然后正常安装，在Anaconda Prompt运行窗口，使用

activate tfenv 激活环境

deactivate tfenv 关闭环境

2.1.1常用

list
np.array
tf.Tensor

2.1.2什么是 Tensor

scalar： 1.1

vector： [1.1], [1.1,2.2,...]

matrix: [[1.1,2.2],[3.3,4.4],[5.5,6.6]]

tensor: rank>2

2.1.3科学计算库

int, float, double

bool

string

代码示例：
import tensorflow as tf
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

2.1.4创建

print(tf.constant(1))
# tf.Tensor(1, shape=(), dtype=int32)
print(tf.constant(1.))
# tf.Tensor(1.0, shape=(), dtype=float32)
print(tf.constant(2.2, dtype=tf.int32))
# TypeError: Cannot convert 2.2 to EagerTensor of dtype int32
print(tf.constant(2., dtype=tf.double))
# tf.Tensor(2.0, shape=(), dtype=float64)
print(tf.constant([True, False]))
# tf.Tensor([ True False], shape=(2,), dtype=bool)
print(tf.constant('hello,world.'))
# tf.Tensor(b'hello,world.', shape=(), dtype=string)

2.1.5Tensorflow Property：

with tf.device("cpu"):
    a = tf.constant([1])
with tf.device('gpu'):
    b = tf.range(4)

print(a.device)
# /job:localhost/replica:0/task:0/device:CPU:0 返回当前的设备
print(b.device)
# /job:localhost/replica:0/task:0/device:CPU:0    有gpu应该显示gpu的
aa = a.gpu()
print(aa.device)
# /job:localhost/replica:0/task:0/device:CPU:0    有gpu应该显示gpu的
bb = b.cpu()
print(bb.device)
# /job:localhost/replica:0/task:0/device:CPU:0
print(b.numpy())
# [0 1 2 3]
print(b.ndim)   # 1
print(tf.rank(tf.ones([3, 4, 2])))
# tf.Tensor(3, shape=(), dtype=int32)

保证要不都在GPU要不都在CPU，

2.1.6检查数据类型

a = tf.constant([1.])
b = tf.constant([True, False])
c = tf.constant('hello,world.')
d = np.arange(4)

print(isinstance(a, tf.Tensor))
# True
print(tf.is_tensor(b))
# True
print(a.dtype, b.dtype, c.dtype)
# <dtype: 'float32'> <dtype: 'bool'> <dtype: 'string'>
print(a.dtype == tf.float32)
# True
print(c.dtype == tf.float32)
# False

2.1.7数据类型转换

a = np.arange(5)
print(a)  # [0 1 2 3 4]
print(a.dtype)  # int32
aa = tf.convert_to_tensor(a)
print(aa)  # tf.Tensor([0 1 2 3 4], shape=(5,), dtype=int32)
aa = tf.convert_to_tensor(a, dtype=tf.int64)
print(aa)  # tf.Tensor([0 1 2 3 4], shape=(5,), dtype=int64)

a = tf.cast(aa, dtype=tf.float32)
print(a)  # tf.Tensor([0. 1. 2. 3. 4.], shape=(5,), dtype=float32)
b = tf.cast(a, dtype=tf.double)
print(b)  # tf.Tensor([0. 1. 2. 3. 4.], shape=(5,), dtype=float64)
c = tf.cast(b, dtype=tf.int32)
print(c)
# tf.Tensor([0 1 2 3 4], shape=(5,), dtype=int32)

bool 转换为 int

2.1.8tf.Variable

a = tf.range(5)
print(a)  # tf.Tensor([0 1 2 3 4], shape=(5,), dtype=int32)
b = tf.Variable(a)
print(b.dtype)  # <dtype: 'int32'>

b = tf.Variable(a, name='input_data')
print(b.trainable)  # True，那么b就变为可以训练的参数了
print(tf.is_tensor(b))  # True

2.1.9将数据取回到numpy

a = tf.range(5)
b = tf.Variable(a)

print(a.numpy())  # [0 1 2 3 4]
print(b.numpy())  # [0 1 2 3 4]
a = tf.ones([])
print(a.numpy())  # 1.0

print(int(a))  # 1  转换为整型1
print(float(a))  # 1.0  转换为浮点型1.0

2.2创建Tensor

这几个都可以创建Tensor

2.2.1从 Numpy ，List创建Tensor

2.2.2从tf.zeros创建

a = tf.zeros_like([])
print(a)  # tf.Tensor([], shape=(0,), dtype=float32)
b = tf.zeros([2, 2])
print(b)
# tf.Tensor(
# [[0. 0.]
#  [0. 0.]], shape=(2, 2), dtype=float32)
c = tf.zeros([2, 3, 3])
print(c)
tf.Tensor(
# [[[0. 0. 0.]
#   [0. 0. 0.]
#   [0. 0. 0.]]
# 
#  [[0. 0. 0.]
#   [0. 0. 0.]
#   [0. 0. 0.]]], shape=(2, 3, 3), dtype=float32)

2.2.3从tf.zeros_like创建Tensor

a = tf.zeros([2, 3, 3])
b = tf.zeros_like(a) 
# b和a的类型是一样的

2.2.4从tf.ones创建Tensor

a = tf.ones(1)
print(a)   # tf.Tensor([1.], shape=(1,), dtype=float32)
b = tf.ones([2, 3])
print(b)
# tf.Tensor(
# [[1. 1. 1.]
#  [1. 1. 1.]], shape=(2, 3), dtype=float32)
c = tf.ones_like(a)
print(c)  # tf.Tensor([1.], shape=(1,), dtype=float32)

2.2.5 fill函数

a = tf.fill([2, 2], 9)
print(a)
# tf.Tensor(
# [[9 9]
#  [9 9]], shape=(2, 2), dtype=int32)

2.2.6 Normal随机初始化函数

标准的正太分布和截断分布

a = tf.random.normal([2, 2], mean=1, stddev=1)
print(a)
# tf.Tensor(
# [[1.7453098  1.6442213 ]
#  [2.7475848  0.10199422]], shape=(2, 2), dtype=float32)
b = tf.random.truncated_normal([2, 2], mean=0, stddev=1)
print(b)
# tf.Tensor(
# [[ 1.3680507  -1.1750759 ]
#  [ 0.12701823  1.2563375 ]], shape=(2, 2), dtype=float32)

2.2.7Uniform:均匀分布

a = tf.random.uniform([2, 2], minval=0, maxval=1)
print(a)
# tf.Tensor(
# [[0.16156495 0.70819294]
#  [0.67944014 0.12850523]], shape=(2, 2), dtype=float32)

2.2.8随机打散，但是一一对应的

idx = tf.range(10)
idx = tf.random.shuffle(idx)
print(idx)  # tf.Tensor([7 4 6 3 8 0 9 2 5 1], shape=(10,), dtype=int32)  随机打散
a = tf.random.normal([10, 784])
b = tf.random.uniform([10], maxval=10, dtype=tf.int32)

a = tf.gather(a, idx)
b = tf.gather(b, idx)
print(a)
print(b)  # tf.Tensor([7 8 4 8 5 7 8 1 2 4], shape=(10,), dtype=int32)

2.2.9tf.constant:赋值差不多吧

a = tf.constant(1)
print(a)  # tf.Tensor(1, shape=(), dtype=int32)
b = tf.constant([1])
print(b)  # tf.Tensor([1], shape=(1,), dtype=int32)
c = tf.constant([1, 2.])
print(c)  # tf.Tensor([1. 2.], shape=(2,), dtype=float32)

2.2.10 loss应用

2.2.11 Vector

dense就是维度变换，Kernel就是w，bias就是b

2.2.12 Matrix

4张照片，48*48打平，然后转换为10维

2.2.13 3，4，5维的ensor

第一个是有多少个句子，第二个是句子的单词的数量，第三个是单词的编码的长度

第一个代表有几张图片，第二个代表h高度，第三个代表宽度，第四个代表3，rgb3通道

第一个代表几个任务，第二个代表多少图片，第三个代表h高度，第四个代表宽度，第五个代表3，rgb3通道

2.3索引与切片

2.3.1基本索引不推荐

2.3.2 Numpy风格的索引

2.3.3 start:end

最后一个元素索引为-1，第一个元素为0，

包含start，不包含end

a = tf.range(10)
print(a)  # tf.Tensor([0 1 2 3 4 5 6 7 8 9], shape=(10,), dtype=int32)
print(a[-1:])  # tf.Tensor([9], shape=(1,), dtype=int32)
print(a[-2:])  # tf.Tensor([8 9], shape=(2,), dtype=int32)
print(a[:2])  # tf.Tensor([0 1], shape=(2,), dtype=int32)
print(a[:-1])  # tf.Tensor([0 1 2 3 4 5 6 7 8], shape=(9,), dtype=int32)

2.3.4 索引 start ：end：step

无

2.3.5倒序::-1

a = tf.range(4)
print(a)  # tf.Tensor([0 1 2 3], shape=(4,), dtype=int32)
print(a[::-1])  # tf.Tensor([3 2 1 0], shape=(4,), dtype=int32)
print(a[::-2])  # tf.Tensor([3 1], shape=(2,), dtype=int32)隔着采样
print(a[2::-2])  # tf.Tensor([2 0], shape=(2,), dtype=int32)

最后一行这个2是A

2.3.6 ...

就是后面全部都要

a = tf.random.normal([2, 4, 28, 28, 3])
print(a[0].shape)  # (4, 28, 28, 3)
print(a[0, ...].shape)  # (4, 28, 28, 3)

2.3.7 选择索引tf.gather、tf.gather_nd、tf.boolean_mask

1.tf.gather

2.tf.gather_nd

4个班级，35个学生，8门成绩

3.tf.boolean_mask

axis默认为0

2.4.维度变换

2.4.1 view视图、Reshape

[b, 28, 28]  # 
[b, 28*28]  # 
[b, 2, 14*28]  # 改变了下图片的理解方式，其他没变
[b, 28, 28, 1]  # 只是增加概念，其余都没变

a = tf.random.normal([4, 28, 28, 3])
print(a.shape, a.ndim)  # (4, 28, 28, 3) 4
print(tf.reshape(a, [4, 784, 3]).shape)  # (4, 784, 3)  [b, pixel(像素信息), c]
print(tf.reshape(a, [4, -1, 3]).shape)  # (4, 784, 3)
print(tf.reshape(a, [4, 784*3]).shape)  # (4, 2352)  数据点
print(tf.reshape(a, [4, -1]).shape)  # (4, 2352)

reshape可能会导致潜在的bug，因为维度变化的时候不知道H和W的值，或者H和W的顺序变了，必须要有具体的物理意义

2.4.2 转置 tf.transpose

a = tf.random.normal([4, 3, 2, 1])
print(a.shape)  # (4, 3, 2, 1)
print(tf.transpose(a).shape)  # (1, 2, 3, 4)
print(tf.transpose(a, perm=[0, 1, 3, 2]).shape)  
# (4, 3, 1, 2)  perm是0放原来的0索引，以此类推

[b,h,w,c ]-->[b,w,h,c] 就要用到这个转换content

pytorch到tensorflow的转换，就要用到维度转换

2.4.3Expand dim 维度扩展

四个班级，35个学生，8门成绩，给正数在轴前面增加，给负数在轴后面增加

2.4.4Squeeze dim 维度压缩

2.5 张量维度扩张

2.5.1 Broadcasting

小维度先对齐，内存更为节省，减少运算。

可以

不可以

都可以

2.5.2 tf.broadcast_to显式转换

2.5.3 Broadcast VS Tile

2.6 数学运算

+-*/ ：矩阵对应元素的加减乘除

@,matmul ：乘

reduce_mean/max/min/sum:平均值

tf.math.log ---- tf.exp ----pow ----sqrt

实例：

2.7 前向传播（张量）实战

import  tensorflow as tf
from    tensorflow import keras
from    tensorflow.keras import datasets
import  os

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

# x: [60k, 28, 28],
# y: [60k] 有60K这样的图片的lable， 从0-9
(x, y), _ = datasets.mnist.load_data() #加载数据集
# x: [0~255] => [0~1.] 将x，y转换成tensor
x = tf.convert_to_tensor(x, dtype=tf.float32) / 255.
y = tf.convert_to_tensor(y, dtype=tf.int32)

print(x.shape, y.shape, x.dtype, y.dtype)
print(tf.reduce_min(x), tf.reduce_max(x))
print(tf.reduce_min(y), tf.reduce_max(y))
# 看x，y的最大值和最小值

train_db = tf.data.Dataset.from_tensor_slices((x, y)).batch(128)
# 一次取128个照片，之前是06K，但是只想取128张一次
train_iter = iter(train_db)
sample = next(train_iter)
print('batch:', sample[0].shape, sample[1].shape)
# sample[0]是x，sample[1]是y，就是一个128的整型


# [b, 784] => [b, 256] => [b, 128] => [b, 10]
# [dim_in, dim_out], [dim_out] ，进去784，出来256，均值是0，方差是0.1
w1 = tf.Variable(tf.random.truncated_normal([784, 256], stddev=0.1))
b1 = tf.Variable(tf.zeros([256]))
w2 = tf.Variable(tf.random.truncated_normal([256, 128], stddev=0.1))
b2 = tf.Variable(tf.zeros([128]))
w3 = tf.Variable(tf.random.truncated_normal([128, 10], stddev=0.1))
b3 = tf.Variable(tf.zeros([10]))

lr = 1e-3

for epoch in range(10):  # 对整个数据集迭代10次
    for step, (x, y) in enumerate(train_db):  # step就是batch
        # x:[128, 28, 28]
        # y: [128]

        # 完成[b, 28, 28] => [b, 28*28]
        x = tf.reshape(x, [-1, 28*28])

        with tf.GradientTape() as tape:  # tf.Variable梯度下降函数，必须要有
            # x: [b, 28*28]
            # h1 = x@w1 + b1
            # [b, 784]@[784, 256] + [256] => [b, 256] + [256] => [b, 256] + [b, 256]
            h1 = x@w1 + tf.broadcast_to(b1, [x.shape[0], 256])
            h1 = tf.nn.relu(h1)
            # [b, 256] => [b, 128]
            h2 = h1@w2 + b2
            h2 = tf.nn.relu(h2)
            # [b, 128] => [b, 10]
            out = h2@w3 + b3

            # compute loss 计算误差
            # out: [b, 10]
            # y: [b] => [b, 10] onehot编码
            y_onehot = tf.one_hot(y, depth=10)

            # mse = mean(sum(y-out)^2)均方差
            # [b, 10] square是平方
            loss = tf.square(y_onehot - out)
            # mean: scalar
            loss = tf.reduce_mean(loss)

        # compute gradients计算求导
        grads = tape.gradient(loss, [w1, b1, w2, b2, w3, b3])
        # print(grads)
        # w1 = w1 - lr * w1_grad
        w1.assign_sub(lr * grads[0])  # 原地更新
        b1.assign_sub(lr * grads[1])
        w2.assign_sub(lr * grads[2])
        b2.assign_sub(lr * grads[3])
        w3.assign_sub(lr * grads[4])
        b3.assign_sub(lr * grads[5])

        if step % 100 == 0:
            print(epoch, step, 'loss:', float(loss))

3.Tensorflow高阶操作

3.1 合并与分割

保证其他维度相等首先

concat和stack是合并操作

unstack和split是切分操作

3.1.1 concat合并维度

axis就是在哪个维度进行合并

3.1.2 stack创建新的维度

3.1.3 Unstack 拆分维度

3.1.4 Split指定切分的长度

3.2 数据统计

二范数，无穷范数，一范数

3.2.1 tf.norm 二范数

ord = n，就是n范数

3.2.2 tf.norm 一范数

3.2.3 reduce_min/max/mean

加reduce是为了告诉你，有个降维得操作在里面

3.2.4 argmax/argmin 最大最小指定索引

返回索引位置，可以用axis指定索引

3.2.5 比较 tf.equal

求Accuracy实例

3.2.6 去重复 tf.unique

3.3 张量排序

3.3.1 sort/argsort

3.3.2 Top_k Accuracy

最大的前几个

res.indices返回索引

res.values返回这个值

top-1 是说最有可能的是谁

预测值是3，真实值是2，所以准确度是0%。

top-2 是说，最有可能的前两个是谁

预测值是3和2，真实值是2，所以准确度是100%

【2，1，0】代表了最有可能性到最没有可能性

Top-k完整计算流程

import tensorflow as tf
import os

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
tf.random.set_seed(2467)


def accuracy(output, target, topk=(1,)):
    maxk = max(topk)
    batch_size = target.shape[0]

    pred = tf.math.top_k(output, maxk).indices
    pred = tf.transpose(pred, perm=[1, 0])
    target_ = tf.broadcast_to(target, pred.shape)
    # [10, b]
    correct = tf.equal(pred, target_)

    res = []
    for k in topk:
        correct_k = tf.cast(tf.reshape(correct[:k], [-1]), dtype=tf.float32)
        correct_k = tf.reduce_sum(correct_k)
        acc = float(correct_k* (100.0 / batch_size) )
        res.append(acc)

    return res


output = tf.random.normal([10, 6])  # 10个样本，6列
output = tf.math.softmax(output, axis=1)
target = tf.random.uniform([10], maxval=6, dtype=tf.int32)
print('prob:', output.numpy())
pred = tf.argmax(output, axis=1)
print('pred:', pred.numpy())
print('label:', target.numpy())

acc = accuracy(output, target, topk=(1,2,3,4,5,6))
print('top-1-6 acc:', acc)

3.4 填充与复制

3.4.1 pad 填充

这样补充的，上面的行没补充是0，其他补充的是1

image padding

上下都padding两行

3.4.2 tile 复制

tf.tile(a,[1,2]) [1,2] 1代表当前维度不复制，2代表第二个维度复制1次

3.5 张量限幅

3.5.1 clip_by_value

实现relu函数

3.5.1 clip_by_norm

根据范数来裁剪，没懂这个

3.5.2 Gradient clipping 梯度裁剪

使网络更好

3.6 高阶操作

mask：获得一个true和false的矩阵判断

where：获得一个元素的坐标

where接收三个参数：从AB中的值进行筛选

scatter_nd：有目的性的更新，且全为0的底板上更新

meshgrid：

生成等高线

import tensorflow as tf
import matplotlib.pyplot as plt
def func(x):
    """
    :param x: [b, 2]
    :return:
    """
    z = tf.math.sin(x[...,0]) + tf.math.sin(x[...,1])
    return z

x = tf.linspace(0., 2*3.14, 500)
y = tf.linspace(0., 2*3.14, 500)
# [50, 50]
point_x, point_y = tf.meshgrid(x, y)
# [50, 50, 2]
points = tf.stack([point_x, point_y], axis=2)
# points = tf.reshape(points, [-1, 2])
print('points:', points.shape)
z = func(points)
print('z:', z.shape)

plt.figure('plot 2d func value')
plt.imshow(z, origin='lower', interpolation='none')
plt.colorbar()

plt.figure('plot 2d func contour')
plt.contour(point_x, point_y, z)
plt.colorbar()
plt.show()

4 神经网络与全连接层

4.1数据加载

mnist：0-9的数字图片

CIFAR10/100：10种类的动物

4.1.1 tf.data.Dataset

专门用来做数据集的迭代

4.1.2 .shuffle 打散

4.1.3 .map

4.1.4 .batch

4.1.5 Stoplteration

4.1.6 .repeat（）

迭代2次，空着就使一直迭代

例子：

4.2 测试（张量）实战

import  tensorflow as tf
from    tensorflow import keras
from    tensorflow.keras import datasets
import  os

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

# x: [60k, 28, 28], [10, 28, 28]
# y: [60k], [10k]
(x, y), (x_test, y_test) = datasets.mnist.load_data()
# x: [0~255] => [0~1.]
x = tf.convert_to_tensor(x, dtype=tf.float32) / 255.
y = tf.convert_to_tensor(y, dtype=tf.int32)
x_test = tf.convert_to_tensor(x_test, dtype=tf.float32) / 255.
y_test = tf.convert_to_tensor(y_test, dtype=tf.int32)

print(x.shape, y.shape, x.dtype, y.dtype)
print(tf.reduce_min(x), tf.reduce_max(x))
print(tf.reduce_min(y), tf.reduce_max(y))


train_db = tf.data.Dataset.from_tensor_slices((x,y)).batch(128)
test_db = tf.data.Dataset.from_tensor_slices((x_test,y_test)).batch(128)
train_iter = iter(train_db)
sample = next(train_iter)
print('batch:', sample[0].shape, sample[1].shape)


# [b, 784] => [b, 256] => [b, 128] => [b, 10]
# [dim_in, dim_out], [dim_out]
w1 = tf.Variable(tf.random.truncated_normal([784, 256], stddev=0.1))
b1 = tf.Variable(tf.zeros([256]))
w2 = tf.Variable(tf.random.truncated_normal([256, 128], stddev=0.1))
b2 = tf.Variable(tf.zeros([128]))
w3 = tf.Variable(tf.random.truncated_normal([128, 10], stddev=0.1))
b3 = tf.Variable(tf.zeros([10]))

lr = 1e-3

for epoch in range(100): # iterate db for 10
    for step, (x, y) in enumerate(train_db): # for every batch
        # x:[128, 28, 28]
        # y: [128]

        # [b, 28, 28] => [b, 28*28]
        x = tf.reshape(x, [-1, 28*28])

        with tf.GradientTape() as tape: # tf.Variable
            # x: [b, 28*28]
            # h1 = x@w1 + b1
            # [b, 784]@[784, 256] + [256] => [b, 256] + [256] => [b, 256] + [b, 256]
            h1 = x@w1 + tf.broadcast_to(b1, [x.shape[0], 256])
            h1 = tf.nn.relu(h1)
            # [b, 256] => [b, 128]
            h2 = h1@w2 + b2
            h2 = tf.nn.relu(h2)
            # [b, 128] => [b, 10]
            out = h2@w3 + b3

            # compute loss
            # out: [b, 10]
            # y: [b] => [b, 10]
            y_onehot = tf.one_hot(y, depth=10)

            # mse = mean(sum(y-out)^2)
            # [b, 10]
            loss = tf.square(y_onehot - out)
            # mean: scalar
            loss = tf.reduce_mean(loss)

        # compute gradients
        grads = tape.gradient(loss, [w1, b1, w2, b2, w3, b3])
        # print(grads)
        # w1 = w1 - lr * w1_grad
        w1.assign_sub(lr * grads[0])
        b1.assign_sub(lr * grads[1])
        w2.assign_sub(lr * grads[2])
        b2.assign_sub(lr * grads[3])
        w3.assign_sub(lr * grads[4])
        b3.assign_sub(lr * grads[5])


        if step % 100 == 0:
            print(epoch, step, 'loss:', float(loss))


    # test/evluation
    # [w1, b1, w2, b2, w3, b3]
    total_correct, total_num = 0, 0
    for step, (x, y) in enumerate(test_db):

        # [b, 28, 28] => [b, 28*28]
        x = tf.reshape(x, [-1, 28*28])

        # [b, 784] => [b, 256] => [b, 128] => [b, 10]
        h1 = tf.nn.relu(x@w1 + b1)
        h2 = tf.nn.relu(h1@w2 + b2)
        out = h2@w3 + b3

        # out: [b, 10] ~ R
        # prob: [b, 10] ~ [0, 1]
        prob = tf.nn.softmax(out, axis=1)
        # [b, 10] => [b]
        # int64!!!
        pred = tf.argmax(prob, axis=1)
        pred = tf.cast(pred, dtype=tf.int32)
        # y: [b]
        # [b], int32
        # print(pred.dtype, y.dtype)
        correct = tf.cast(tf.equal(pred, y), dtype=tf.int32)
        correct = tf.reduce_sum(correct)

        total_correct += int(correct)
        total_num += x.shape[0]

    acc = total_correct / total_num
    print('test acc:', acc)

4.3 全连接层

三层嵌套结构

4.3.1 Sequential多层

实战多层神经网络：

import tensorflow as tf 
from 	tensorflow import keras

x = tf.random.normal([2, 3])

model = keras.Sequential([
		keras.layers.Dense(2, activation='relu'),
		keras.layers.Dense(2, activation='relu'),
		keras.layers.Dense(2)
	])
model.build(input_shape=[None, 3])
model.summary()

for p in model.trainable_variables:
	print(p.name, p.shape)

4.4 输出方式

tf.sigmoid

softmax

tf.tanh

4.5 误差的计算

MSE：均值误差

熵越大越稳定，lottery

Cross Entropy Loss 交叉熵不懂

二分类的交叉熵

简单输出：

多分类的交叉熵

实例计算交叉熵：

得到loss：

数值不稳定现象：

5 梯度下降

5.1 梯度下降介绍

Tensorflow的自动求导

求解梯度：

重复梯度求解：梯度求了之后要进行释放资源，因为梯度求导很耗费资源

二阶梯度的求导：几乎用不到

实战：代码可以看

import tensorflow as tf

w = tf.Variable(1.0)
b = tf.Variable(2.0)
x = tf.Variable(3.0)

with tf.GradientTape() as t1:
  with tf.GradientTape() as t2:
    y = x * w + b
  dy_dw, dy_db = t2.gradient(y, [w, b])
d2y_dw2 = t1.gradient(dy_dw, w)

print(dy_dw)
print(dy_db)
print(d2y_dw2)

assert dy_dw.numpy() == 3.0
assert d2y_dw2 is None

5.2 激活函数及其梯度

Sigmoid函数：

Tanh函数：

Relu函数：

tf.nn.relu + tf.nn.leaky_relu

5.3 损失函数及其梯度1

5.3.1 MSE Gradient

5.3. 2 Softmax

求导推导：

5.4 单输出感知机及其梯度

5.5 多输出感知机及其梯度

5.6 链式法则

5.7 反向传播算法1 2 下次可以重新听下

5.8 函数优化实战

梯度下降：

函数优化实战代码：

import  numpy as np
from    mpl_toolkits.mplot3d import Axes3D
from    matplotlib import pyplot as plt
import  tensorflow as tf



def himmelblau(x):
    return (x[0] ** 2 + x[1] - 11) ** 2 + (x[0] + x[1] ** 2 - 7) ** 2


x = np.arange(-6, 6, 0.1)
y = np.arange(-6, 6, 0.1)
print('x,y range:', x.shape, y.shape)
X, Y = np.meshgrid(x, y)
print('X,Y maps:', X.shape, Y.shape)
Z = himmelblau([X, Y])

fig = plt.figure('himmelblau')
ax = fig.gca(projection='3d')
ax.plot_surface(X, Y, Z)
ax.view_init(60, -30)
ax.set_xlabel('x')
ax.set_ylabel('y')
plt.show()


# [1., 0.], [-4, 0.], [4, 0.]
x = tf.constant([4., 0.])

for step in range(200):

    with tf.GradientTape() as tape:
        tape.watch([x])
        y = himmelblau(x)

    grads = tape.gradient(y, [x])[0] 
    x -= 0.01*grads

    

    if step % 20 == 0:
        print ('step {}: x = {}, f(x) = {}'
               .format(step, x.numpy(), y.numpy()))