目录
2.3.7 选择索引tf.gather、tf.gather_nd、tf.boolean_mask
1.回归问题
1.1线性回归
loss就是误差函数,要让这个误差函数达到最小,使用的方法就是梯度下降
lr:学习率,前进步长
1.2回归问题实战
import numpy as np
# y = wx + b
def compute_error_for_line_given_points(b, w, points):
totalError = 0
for i in range(0, len(points)):
x = points[i, 0]
y = points[i, 1]
# computer mean-squared-error
totalError += (y - (w * x + b)) ** 2
# average loss for each point
return totalError / float(len(points))
def step_gradient(b_current, w_current, points, learningRate):
b_gradient = 0
w_gradient = 0
N = float(len(points))
for i in range(0, len(points)):
x = points[i, 0]
y = points[i, 1]
# grad_b = 2(wx+b-y)
b_gradient += (2/N) * ((w_current * x + b_current) - y)
# grad_w = 2(wx+b-y)*x
w_gradient += (2/N) * x * ((w_current * x + b_current) - y)
# update w'
new_b = b_current - (learningRate * b_gradient)
new_w = w_current - (learningRate * w_gradient)
return [new_b, new_w]
def gradient_descent_runner(points, starting_b, starting_w, learning_rate, num_iterations):
b = starting_b
w = starting_w
# update for several times
for i in range(num_iterations):
b, w = step_gradient(b, w, np.array(points), learning_rate)
return [b, w]
def run():
points = np.genfromtxt("data.csv", delimiter=",")
learning_rate = 0.0001
initial_b = 0 # initial y-intercept guess
initial_w = 0 # initial slope guess
num_iterations = 1000
print("Starting gradient descent at b = {0}, w = {1}, error = {2}"
.format(initial_b, initial_w,
compute_error_for_line_given_points(initial_b, initial_w, points))
)
print("Running...")
[b, w] = gradient_descent_runner(points, initial_b, initial_w, learning_rate, num_iterations)
print("After {0} iterations b = {1}, w = {2}, error = {3}".
format(num_iterations, b, w,
compute_error_for_line_given_points(b, w, points))
)
if __name__ == '__main__':
run()
1.2手写数字问题
minst数据集:【28,28,1】,28行,28列,1个RGB通道
打平得到【28,28】-》【28*28】
引入非线性的relu函数
寻找最优得这六个参数,进行了三次relu
总结:
然后根据index辨别类别
1.2手写数字问题实战
MNIST数据集实战:
Step1&2.Compute out&loss
Step3.Compute gradient and optimize
Step4:loop
源代码:
import os
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, optimizers, datasets
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
(x, y), (x_val, y_val) = datasets.mnist.load_data()
x = tf.convert_to_tensor(x, dtype=tf.float32) / 255.
y = tf.convert_to_tensor(y, dtype=tf.int32)
y = tf.one_hot(y, depth=10)
print(x.shape, y.shape)
train_dataset = tf.data.Dataset.from_tensor_slices((x, y))
train_dataset = train_dataset.batch(200)
model = keras.Sequential([
layers.Dense(512, activation='relu'),
layers.Dense(256, activation='relu'),
layers.Dense(10)])
optimizer = optimizers.SGD(learning_rate=0.001)
def train_epoch(epoch):
# Step4.loop
for step, (x, y) in enumerate(train_dataset):
with tf.GradientTape() as tape:
# [b, 28, 28] => [b, 784]
x = tf.reshape(x, (-1, 28*28))
# Step1. compute output
# [b, 784] => [b, 10]
out = model(x)
# Step2. compute loss
loss = tf.reduce_sum(tf.square(out - y)) / x.shape[0]
# Step3. optimize and update w1, w2, w3, b1, b2, b3
grads = tape.gradient(loss, model.trainable_variables)
# w' = w - lr * grad
optimizer.apply_gradients(zip(grads, model.trainable_variables))
if step % 100 == 0:
print(epoch, step, 'loss:', loss.numpy())
def train():
for epoch in range(30):
train_epoch(epoch)
if __name__ == '__main__':
train()
2.Tensorflow基础操作
2.1数据类型
Tensorflow安装
百度清华镜像安装,然后正常安装,在Anaconda Prompt运行窗口,使用
activate tfenv 激活环境
deactivate tfenv 关闭环境
2.1.1常用
list
np.array
tf.Tensor
2.1.2什么是 Tensor
scalar: 1.1
vector: [1.1], [1.1,2.2,...]
matrix: [[1.1,2.2],[3.3,4.4],[5.5,6.6]]
tensor: rank>2
2.1.3科学计算库
int, float, double
bool
string
代码示例:
import tensorflow as tf
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
2.1.4创建
print(tf.constant(1))
# tf.Tensor(1, shape=(), dtype=int32)
print(tf.constant(1.))
# tf.Tensor(1.0, shape=(), dtype=float32)
print(tf.constant(2.2, dtype=tf.int32))
# TypeError: Cannot convert 2.2 to EagerTensor of dtype int32
print(tf.constant(2., dtype=tf.double))
# tf.Tensor(2.0, shape=(), dtype=float64)
print(tf.constant([True, False]))
# tf.Tensor([ True False], shape=(2,), dtype=bool)
print(tf.constant('hello,world.'))
# tf.Tensor(b'hello,world.', shape=(), dtype=string)
2.1.5Tensorflow Property:
with tf.device("cpu"):
a = tf.constant([1])
with tf.device('gpu'):
b = tf.range(4)
print(a.device)
# /job:localhost/replica:0/task:0/device:CPU:0 返回当前的设备
print(b.device)
# /job:localhost/replica:0/task:0/device:CPU:0 有gpu应该显示gpu的
aa = a.gpu()
print(aa.device)
# /job:localhost/replica:0/task:0/device:CPU:0 有gpu应该显示gpu的
bb = b.cpu()
print(bb.device)
# /job:localhost/replica:0/task:0/device:CPU:0
print(b.numpy())
# [0 1 2 3]
print(b.ndim) # 1
print(tf.rank(tf.ones([3, 4, 2])))
# tf.Tensor(3, shape=(), dtype=int32)
保证要不都在GPU要不都在CPU,
2.1.6检查数据类型
a = tf.constant([1.])
b = tf.constant([True, False])
c = tf.constant('hello,world.')
d = np.arange(4)
print(isinstance(a, tf.Tensor))
# True
print(tf.is_tensor(b))
# True
print(a.dtype, b.dtype, c.dtype)
# <dtype: 'float32'> <dtype: 'bool'> <dtype: 'string'>
print(a.dtype == tf.float32)
# True
print(c.dtype == tf.float32)
# False
2.1.7数据类型转换
a = np.arange(5)
print(a) # [0 1 2 3 4]
print(a.dtype) # int32
aa = tf.convert_to_tensor(a)
print(aa) # tf.Tensor([0 1 2 3 4], shape=(5,), dtype=int32)
aa = tf.convert_to_tensor(a, dtype=tf.int64)
print(aa) # tf.Tensor([0 1 2 3 4], shape=(5,), dtype=int64)
a = tf.cast(aa, dtype=tf.float32)
print(a) # tf.Tensor([0. 1. 2. 3. 4.], shape=(5,), dtype=float32)
b = tf.cast(a, dtype=tf.double)
print(b) # tf.Tensor([0. 1. 2. 3. 4.], shape=(5,), dtype=float64)
c = tf.cast(b, dtype=tf.int32)
print(c)
# tf.Tensor([0 1 2 3 4], shape=(5,), dtype=int32)
bool 转换为 int
2.1.8tf.Variable
a = tf.range(5)
print(a) # tf.Tensor([0 1 2 3 4], shape=(5,), dtype=int32)
b = tf.Variable(a)
print(b.dtype) # <dtype: 'int32'>
b = tf.Variable(a, name='input_data')
print(b.trainable) # True,那么b就变为可以训练的参数了
print(tf.is_tensor(b)) # True
2.1.9将数据取回到numpy
a = tf.range(5)
b = tf.Variable(a)
print(a.numpy()) # [0 1 2 3 4]
print(b.numpy()) # [0 1 2 3 4]
a = tf.ones([])
print(a.numpy()) # 1.0
print(int(a)) # 1 转换为整型1
print(float(a)) # 1.0 转换为浮点型1.0
2.2创建Tensor
这几个都可以创建Tensor
2.2.1从 Numpy ,List创建Tensor
2.2.2从tf.zeros创建
a = tf.zeros_like([])
print(a) # tf.Tensor([], shape=(0,), dtype=float32)
b = tf.zeros([2, 2])
print(b)
# tf.Tensor(
# [[0. 0.]
# [0. 0.]], shape=(2, 2), dtype=float32)
c = tf.zeros([2, 3, 3])
print(c)
tf.Tensor(
# [[[0. 0. 0.]
# [0. 0. 0.]
# [0. 0. 0.]]
#
# [[0. 0. 0.]
# [0. 0. 0.]
# [0. 0. 0.]]], shape=(2, 3, 3), dtype=float32)
2.2.3从tf.zeros_like创建Tensor
a = tf.zeros([2, 3, 3])
b = tf.zeros_like(a)
# b和a的类型是一样的
2.2.4从tf.ones创建Tensor
a = tf.ones(1)
print(a) # tf.Tensor([1.], shape=(1,), dtype=float32)
b = tf.ones([2, 3])
print(b)
# tf.Tensor(
# [[1. 1. 1.]
# [1. 1. 1.]], shape=(2, 3), dtype=float32)
c = tf.ones_like(a)
print(c) # tf.Tensor([1.], shape=(1,), dtype=float32)
2.2.5 fill函数
a = tf.fill([2, 2], 9)
print(a)
# tf.Tensor(
# [[9 9]
# [9 9]], shape=(2, 2), dtype=int32)
2.2.6 Normal随机初始化函数
标准的正太分布和截断分布
a = tf.random.normal([2, 2], mean=1, stddev=1)
print(a)
# tf.Tensor(
# [[1.7453098 1.6442213 ]
# [2.7475848 0.10199422]], shape=(2, 2), dtype=float32)
b = tf.random.truncated_normal([2, 2], mean=0, stddev=1)
print(b)
# tf.Tensor(
# [[ 1.3680507 -1.1750759 ]
# [ 0.12701823 1.2563375 ]], shape=(2, 2), dtype=float32)
2.2.7Uniform:均匀分布
a = tf.random.uniform([2, 2], minval=0, maxval=1)
print(a)
# tf.Tensor(
# [[0.16156495 0.70819294]
# [0.67944014 0.12850523]], shape=(2, 2), dtype=float32)
2.2.8随机打散,但是一一对应的
idx = tf.range(10)
idx = tf.random.shuffle(idx)
print(idx) # tf.Tensor([7 4 6 3 8 0 9 2 5 1], shape=(10,), dtype=int32) 随机打散
a = tf.random.normal([10, 784])
b = tf.random.uniform([10], maxval=10, dtype=tf.int32)
a = tf.gather(a, idx)
b = tf.gather(b, idx)
print(a)
print(b) # tf.Tensor([7 8 4 8 5 7 8 1 2 4], shape=(10,), dtype=int32)
2.2.9tf.constant:赋值差不多吧
a = tf.constant(1)
print(a) # tf.Tensor(1, shape=(), dtype=int32)
b = tf.constant([1])
print(b) # tf.Tensor([1], shape=(1,), dtype=int32)
c = tf.constant([1, 2.])
print(c) # tf.Tensor([1. 2.], shape=(2,), dtype=float32)
2.2.10 loss应用
2.2.11 Vector
dense就是维度变换,Kernel就是w,bias就是b
2.2.12 Matrix
4张照片,48*48打平,然后转换为10维
2.2.13 3,4,5维的ensor
第一个是有多少个句子,第二个是句子的单词的数量,第三个是单词的编码的长度
第一个代表有几张图片,第二个代表h高度,第三个代表宽度,第四个代表3,rgb3通道
第一个代表几个任务,第二个代表多少图片,第三个代表h高度,第四个代表宽度,第五个代表3,rgb3通道
2.3索引与切片
2.3.1基本索引不推荐
2.3.2 Numpy风格的索引
2.3.3 start:end
最后一个元素索引为-1,第一个元素为0,
包含start,不包含end
a = tf.range(10)
print(a) # tf.Tensor([0 1 2 3 4 5 6 7 8 9], shape=(10,), dtype=int32)
print(a[-1:]) # tf.Tensor([9], shape=(1,), dtype=int32)
print(a[-2:]) # tf.Tensor([8 9], shape=(2,), dtype=int32)
print(a[:2]) # tf.Tensor([0 1], shape=(2,), dtype=int32)
print(a[:-1]) # tf.Tensor([0 1 2 3 4 5 6 7 8], shape=(9,), dtype=int32)
2.3.4 索引 start :end:step
无
2.3.5倒序::-1
a = tf.range(4)
print(a) # tf.Tensor([0 1 2 3], shape=(4,), dtype=int32)
print(a[::-1]) # tf.Tensor([3 2 1 0], shape=(4,), dtype=int32)
print(a[::-2]) # tf.Tensor([3 1], shape=(2,), dtype=int32)隔着采样
print(a[2::-2]) # tf.Tensor([2 0], shape=(2,), dtype=int32)
最后一行这个2是A
2.3.6 ...
就是后面全部都要
a = tf.random.normal([2, 4, 28, 28, 3])
print(a[0].shape) # (4, 28, 28, 3)
print(a[0, ...].shape) # (4, 28, 28, 3)
2.3.7 选择索引tf.gather、tf.gather_nd、tf.boolean_mask
1.tf.gather
2.tf.gather_nd
4个班级,35个学生,8门成绩
3.tf.boolean_mask
axis默认为0
2.4.维度变换
2.4.1 view视图、Reshape
[b, 28, 28] #
[b, 28*28] #
[b, 2, 14*28] # 改变了下图片的理解方式,其他没变
[b, 28, 28, 1] # 只是增加概念,其余都没变
a = tf.random.normal([4, 28, 28, 3])
print(a.shape, a.ndim) # (4, 28, 28, 3) 4
print(tf.reshape(a, [4, 784, 3]).shape) # (4, 784, 3) [b, pixel(像素信息), c]
print(tf.reshape(a, [4, -1, 3]).shape) # (4, 784, 3)
print(tf.reshape(a, [4, 784*3]).shape) # (4, 2352) 数据点
print(tf.reshape(a, [4, -1]).shape) # (4, 2352)
reshape可能会导致潜在的bug,因为维度变化的时候不知道H和W的值,或者H和W的顺序变了,必须要有具体的物理意义
2.4.2 转置 tf.transpose
a = tf.random.normal([4, 3, 2, 1])
print(a.shape) # (4, 3, 2, 1)
print(tf.transpose(a).shape) # (1, 2, 3, 4)
print(tf.transpose(a, perm=[0, 1, 3, 2]).shape)
# (4, 3, 1, 2) perm是0放原来的0索引,以此类推
[b,h,w,c ]-->[b,w,h,c] 就要用到这个转换content
pytorch到tensorflow的转换,就要用到维度转换
2.4.3Expand dim 维度扩展
四个班级,35个学生,8门成绩,给正数在轴前面增加,给负数在轴后面增加
2.4.4Squeeze dim 维度压缩
2.5 张量维度扩张
2.5.1 Broadcasting
小维度先对齐,内存更为节省,减少运算。
可以
不可以
都可以
2.5.2 tf.broadcast_to显式转换
2.5.3 Broadcast VS Tile
2.6 数学运算
+-*/ :矩阵对应元素的加减乘除
@,matmul : 乘
reduce_mean/max/min/sum:平均值
tf.math.log ---- tf.exp ----pow ----sqrt
实例:
2.7 前向传播(张量)实战
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
# x: [60k, 28, 28],
# y: [60k] 有60K这样的图片的lable, 从0-9
(x, y), _ = datasets.mnist.load_data() #加载数据集
# x: [0~255] => [0~1.] 将x,y转换成tensor
x = tf.convert_to_tensor(x, dtype=tf.float32) / 255.
y = tf.convert_to_tensor(y, dtype=tf.int32)
print(x.shape, y.shape, x.dtype, y.dtype)
print(tf.reduce_min(x), tf.reduce_max(x))
print(tf.reduce_min(y), tf.reduce_max(y))
# 看x,y的最大值和最小值
train_db = tf.data.Dataset.from_tensor_slices((x, y)).batch(128)
# 一次取128个照片,之前是06K,但是只想取128张一次
train_iter = iter(train_db)
sample = next(train_iter)
print('batch:', sample[0].shape, sample[1].shape)
# sample[0]是x,sample[1]是y,就是一个128的整型
# [b, 784] => [b, 256] => [b, 128] => [b, 10]
# [dim_in, dim_out], [dim_out] ,进去784,出来256,均值是0,方差是0.1
w1 = tf.Variable(tf.random.truncated_normal([784, 256], stddev=0.1))
b1 = tf.Variable(tf.zeros([256]))
w2 = tf.Variable(tf.random.truncated_normal([256, 128], stddev=0.1))
b2 = tf.Variable(tf.zeros([128]))
w3 = tf.Variable(tf.random.truncated_normal([128, 10], stddev=0.1))
b3 = tf.Variable(tf.zeros([10]))
lr = 1e-3
for epoch in range(10): # 对整个数据集迭代10次
for step, (x, y) in enumerate(train_db): # step就是batch
# x:[128, 28, 28]
# y: [128]
# 完成[b, 28, 28] => [b, 28*28]
x = tf.reshape(x, [-1, 28*28])
with tf.GradientTape() as tape: # tf.Variable梯度下降函数,必须要有
# x: [b, 28*28]
# h1 = x@w1 + b1
# [b, 784]@[784, 256] + [256] => [b, 256] + [256] => [b, 256] + [b, 256]
h1 = x@w1 + tf.broadcast_to(b1, [x.shape[0], 256])
h1 = tf.nn.relu(h1)
# [b, 256] => [b, 128]
h2 = h1@w2 + b2
h2 = tf.nn.relu(h2)
# [b, 128] => [b, 10]
out = h2@w3 + b3
# compute loss 计算误差
# out: [b, 10]
# y: [b] => [b, 10] onehot编码
y_onehot = tf.one_hot(y, depth=10)
# mse = mean(sum(y-out)^2)均方差
# [b, 10] square是平方
loss = tf.square(y_onehot - out)
# mean: scalar
loss = tf.reduce_mean(loss)
# compute gradients计算求导
grads = tape.gradient(loss, [w1, b1, w2, b2, w3, b3])
# print(grads)
# w1 = w1 - lr * w1_grad
w1.assign_sub(lr * grads[0]) # 原地更新
b1.assign_sub(lr * grads[1])
w2.assign_sub(lr * grads[2])
b2.assign_sub(lr * grads[3])
w3.assign_sub(lr * grads[4])
b3.assign_sub(lr * grads[5])
if step % 100 == 0:
print(epoch, step, 'loss:', float(loss))
3.Tensorflow高阶操作
3.1 合并与分割
保证其他维度相等首先
concat和stack是合并操作
unstack和split是切分操作
3.1.1 concat合并维度
axis就是在哪个维度进行合并
3.1.2 stack创建新的维度
3.1.3 Unstack 拆分维度
3.1.4 Split指定切分的长度
3.2 数据统计
二范数,无穷范数,一范数
3.2.1 tf.norm 二范数
ord = n,就是n范数
3.2.2 tf.norm 一范数
3.2.3 reduce_min/max/mean
加reduce是为了告诉你,有个降维得操作在里面
3.2.4 argmax/argmin 最大最小指定索引
返回索引位置,可以用axis指定索引
3.2.5 比较 tf.equal
求Accuracy实例
3.2.6 去重复 tf.unique
3.3 张量排序
3.3.1 sort/argsort
3.3.2 Top_k Accuracy
最大的前几个
res.indices返回索引
res.values返回这个值
top-1 是说最有可能的是谁
预测值是3,真实值是2,所以准确度是0%。
top-2 是说,最有可能的前两个是谁
预测值是3和2,真实值是2,所以准确度是100%
【2,1,0】代表了最有可能性到最没有可能性
Top-k完整计算流程
import tensorflow as tf
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
tf.random.set_seed(2467)
def accuracy(output, target, topk=(1,)):
maxk = max(topk)
batch_size = target.shape[0]
pred = tf.math.top_k(output, maxk).indices
pred = tf.transpose(pred, perm=[1, 0])
target_ = tf.broadcast_to(target, pred.shape)
# [10, b]
correct = tf.equal(pred, target_)
res = []
for k in topk:
correct_k = tf.cast(tf.reshape(correct[:k], [-1]), dtype=tf.float32)
correct_k = tf.reduce_sum(correct_k)
acc = float(correct_k* (100.0 / batch_size) )
res.append(acc)
return res
output = tf.random.normal([10, 6]) # 10个样本,6列
output = tf.math.softmax(output, axis=1)
target = tf.random.uniform([10], maxval=6, dtype=tf.int32)
print('prob:', output.numpy())
pred = tf.argmax(output, axis=1)
print('pred:', pred.numpy())
print('label:', target.numpy())
acc = accuracy(output, target, topk=(1,2,3,4,5,6))
print('top-1-6 acc:', acc)
3.4 填充与复制
3.4.1 pad 填充
这样补充的,上面的行没补充是0,其他补充的是1
image padding
上下都padding两行
3.4.2 tile 复制
tf.tile(a,[1,2]) [1,2] 1代表当前维度不复制,2代表第二个维度复制1次
3.5 张量限幅
3.5.1 clip_by_value
实现relu函数
3.5.1 clip_by_norm
根据范数来裁剪,没懂这个
3.5.2 Gradient clipping 梯度裁剪
使网络更好
3.6 高阶操作
mask:获得一个true和false的矩阵判断
where:获得一个元素的坐标
where接收三个参数:从AB中的值进行筛选
scatter_nd:有目的性的更新,且全为0的底板上更新
meshgrid:
生成等高线
import tensorflow as tf
import matplotlib.pyplot as plt
def func(x):
"""
:param x: [b, 2]
:return:
"""
z = tf.math.sin(x[...,0]) + tf.math.sin(x[...,1])
return z
x = tf.linspace(0., 2*3.14, 500)
y = tf.linspace(0., 2*3.14, 500)
# [50, 50]
point_x, point_y = tf.meshgrid(x, y)
# [50, 50, 2]
points = tf.stack([point_x, point_y], axis=2)
# points = tf.reshape(points, [-1, 2])
print('points:', points.shape)
z = func(points)
print('z:', z.shape)
plt.figure('plot 2d func value')
plt.imshow(z, origin='lower', interpolation='none')
plt.colorbar()
plt.figure('plot 2d func contour')
plt.contour(point_x, point_y, z)
plt.colorbar()
plt.show()
4 神经网络与全连接层
4.1数据加载
mnist:0-9的数字图片
CIFAR10/100:10种类的动物
4.1.1 tf.data.Dataset
专门用来做数据集的迭代
4.1.2 .shuffle 打散
4.1.3 .map
4.1.4 .batch
4.1.5 Stoplteration
4.1.6 .repeat()
迭代2次,空着就使一直迭代
例子:
4.2 测试(张量)实战
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
# x: [60k, 28, 28], [10, 28, 28]
# y: [60k], [10k]
(x, y), (x_test, y_test) = datasets.mnist.load_data()
# x: [0~255] => [0~1.]
x = tf.convert_to_tensor(x, dtype=tf.float32) / 255.
y = tf.convert_to_tensor(y, dtype=tf.int32)
x_test = tf.convert_to_tensor(x_test, dtype=tf.float32) / 255.
y_test = tf.convert_to_tensor(y_test, dtype=tf.int32)
print(x.shape, y.shape, x.dtype, y.dtype)
print(tf.reduce_min(x), tf.reduce_max(x))
print(tf.reduce_min(y), tf.reduce_max(y))
train_db = tf.data.Dataset.from_tensor_slices((x,y)).batch(128)
test_db = tf.data.Dataset.from_tensor_slices((x_test,y_test)).batch(128)
train_iter = iter(train_db)
sample = next(train_iter)
print('batch:', sample[0].shape, sample[1].shape)
# [b, 784] => [b, 256] => [b, 128] => [b, 10]
# [dim_in, dim_out], [dim_out]
w1 = tf.Variable(tf.random.truncated_normal([784, 256], stddev=0.1))
b1 = tf.Variable(tf.zeros([256]))
w2 = tf.Variable(tf.random.truncated_normal([256, 128], stddev=0.1))
b2 = tf.Variable(tf.zeros([128]))
w3 = tf.Variable(tf.random.truncated_normal([128, 10], stddev=0.1))
b3 = tf.Variable(tf.zeros([10]))
lr = 1e-3
for epoch in range(100): # iterate db for 10
for step, (x, y) in enumerate(train_db): # for every batch
# x:[128, 28, 28]
# y: [128]
# [b, 28, 28] => [b, 28*28]
x = tf.reshape(x, [-1, 28*28])
with tf.GradientTape() as tape: # tf.Variable
# x: [b, 28*28]
# h1 = x@w1 + b1
# [b, 784]@[784, 256] + [256] => [b, 256] + [256] => [b, 256] + [b, 256]
h1 = x@w1 + tf.broadcast_to(b1, [x.shape[0], 256])
h1 = tf.nn.relu(h1)
# [b, 256] => [b, 128]
h2 = h1@w2 + b2
h2 = tf.nn.relu(h2)
# [b, 128] => [b, 10]
out = h2@w3 + b3
# compute loss
# out: [b, 10]
# y: [b] => [b, 10]
y_onehot = tf.one_hot(y, depth=10)
# mse = mean(sum(y-out)^2)
# [b, 10]
loss = tf.square(y_onehot - out)
# mean: scalar
loss = tf.reduce_mean(loss)
# compute gradients
grads = tape.gradient(loss, [w1, b1, w2, b2, w3, b3])
# print(grads)
# w1 = w1 - lr * w1_grad
w1.assign_sub(lr * grads[0])
b1.assign_sub(lr * grads[1])
w2.assign_sub(lr * grads[2])
b2.assign_sub(lr * grads[3])
w3.assign_sub(lr * grads[4])
b3.assign_sub(lr * grads[5])
if step % 100 == 0:
print(epoch, step, 'loss:', float(loss))
# test/evluation
# [w1, b1, w2, b2, w3, b3]
total_correct, total_num = 0, 0
for step, (x, y) in enumerate(test_db):
# [b, 28, 28] => [b, 28*28]
x = tf.reshape(x, [-1, 28*28])
# [b, 784] => [b, 256] => [b, 128] => [b, 10]
h1 = tf.nn.relu(x@w1 + b1)
h2 = tf.nn.relu(h1@w2 + b2)
out = h2@w3 + b3
# out: [b, 10] ~ R
# prob: [b, 10] ~ [0, 1]
prob = tf.nn.softmax(out, axis=1)
# [b, 10] => [b]
# int64!!!
pred = tf.argmax(prob, axis=1)
pred = tf.cast(pred, dtype=tf.int32)
# y: [b]
# [b], int32
# print(pred.dtype, y.dtype)
correct = tf.cast(tf.equal(pred, y), dtype=tf.int32)
correct = tf.reduce_sum(correct)
total_correct += int(correct)
total_num += x.shape[0]
acc = total_correct / total_num
print('test acc:', acc)
4.3 全连接层
三层嵌套结构
4.3.1 Sequential多层
实战多层神经网络:
import tensorflow as tf
from tensorflow import keras
x = tf.random.normal([2, 3])
model = keras.Sequential([
keras.layers.Dense(2, activation='relu'),
keras.layers.Dense(2, activation='relu'),
keras.layers.Dense(2)
])
model.build(input_shape=[None, 3])
model.summary()
for p in model.trainable_variables:
print(p.name, p.shape)
4.4 输出方式
tf.sigmoid
softmax
tf.tanh
4.5 误差的计算
MSE:均值误差
熵越大越稳定,lottery
Cross Entropy Loss 交叉熵 不懂
二分类的交叉熵
简单输出:
多分类的交叉熵
实例 计算交叉熵:
得到loss:
数值不稳定现象:
5 梯度下降
5.1 梯度下降介绍
Tensorflow的自动求导
求解梯度:
重复梯度求解:梯度求了之后要进行释放资源,因为梯度求导很耗费资源
二阶梯度的求导:几乎用不到
实战:代码可以看
import tensorflow as tf
w = tf.Variable(1.0)
b = tf.Variable(2.0)
x = tf.Variable(3.0)
with tf.GradientTape() as t1:
with tf.GradientTape() as t2:
y = x * w + b
dy_dw, dy_db = t2.gradient(y, [w, b])
d2y_dw2 = t1.gradient(dy_dw, w)
print(dy_dw)
print(dy_db)
print(d2y_dw2)
assert dy_dw.numpy() == 3.0
assert d2y_dw2 is None
5.2 激活函数及其梯度
Sigmoid函数:
Tanh函数:
Relu函数:
tf.nn.relu + tf.nn.leaky_relu
5.3 损失函数及其梯度1
5.3.1 MSE Gradient
5.3. 2 Softmax
求导推导:
5.4 单输出感知机及其梯度
5.5 多输出感知机及其梯度
5.6 链式法则
5.7 反向传播算法1 2 下次可以重新听下
5.8 函数优化实战
梯度下降:
函数优化实战代码:
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import pyplot as plt
import tensorflow as tf
def himmelblau(x):
return (x[0] ** 2 + x[1] - 11) ** 2 + (x[0] + x[1] ** 2 - 7) ** 2
x = np.arange(-6, 6, 0.1)
y = np.arange(-6, 6, 0.1)
print('x,y range:', x.shape, y.shape)
X, Y = np.meshgrid(x, y)
print('X,Y maps:', X.shape, Y.shape)
Z = himmelblau([X, Y])
fig = plt.figure('himmelblau')
ax = fig.gca(projection='3d')
ax.plot_surface(X, Y, Z)
ax.view_init(60, -30)
ax.set_xlabel('x')
ax.set_ylabel('y')
plt.show()
# [1., 0.], [-4, 0.], [4, 0.]
x = tf.constant([4., 0.])
for step in range(200):
with tf.GradientTape() as tape:
tape.watch([x])
y = himmelblau(x)
grads = tape.gradient(y, [x])[0]
x -= 0.01*grads
if step % 20 == 0:
print ('step {}: x = {}, f(x) = {}'
.format(step, x.numpy(), y.numpy()))
5.9 手写数字问题实战(层)123 FashionMNIST数据集
看代码文件里
5.10 TensorBoard 可视化 12
喂单张图片
喂多张图片
自己拼接图片
6 Keras高层API
6.1 Metrics
代码:看文件
6.2 Compile & fit
now
now
now
train到test
6.3 自定义层和网络
6.3.1 Keras.Sequential
6.3.2 Keras.layers.Layer
自定义Dense层:
创建五层网络:
看代码文件,后面有实战
6.4 模型加载与保存
权值更新代码:看文件
第二种保存整个模型:
第三种:更加通用
6.5 Keras实战 CIFAR10数据集
普通层
看代码:!!!比较重要,完整得写Keras
7 过拟合与欠拟合
欠拟合
欠拟合就是train和test都不行
过拟合就包含了一些噪声
7.1 交叉验证
7.1.1 检测过拟合
划分val_test和Test_set
看文件代码,有详细
7.1.2 减轻过拟合
L2-regularization
L1-regularization
7.1.3 动量与学习衰减
动量就是惯性
7.1.4 Early stopping, dropout