07_5_随机梯度下降_手写数字问题实战(层)&TensorBoard可视化
FashionMNIST实战(手写数字问题实战(层))
FashionMNIST
大小、类型和MNIST一样,只是图片内容是帽子、鞋子……10种。
黑白,28*28
代码实现
import os
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
#可以和之前的41_前向传播_forward.py一一对应(使用手写和keras)
import tensorflow as tf
#keras新建层
from tensorflow import keras
#datasets用于数据集的管理;layers用于Dense层;optimizers优化器;Sequential容器;metrics测试的度量器
from tensorflow.keras import datasets, layers, optimizers, Sequential, metrics
#预处理函数
def preprocess(x,y):
#转换成Tensor,并且灰度值转换到0~1范围
# x = tf.convert_to_tensor(x,dtype=tf.float32) /255.
# y = tf.convert_to_tensor(y,dtype=tf.int32)
#因为from_tensor_slices函数会自动转换成Tensor,在这里不需要转化
#直接分别利用cast转化到float32和int32就ok
x = tf.cast(x,dtype=tf.float32) /255.
y = tf.cast(y,dtype=tf.int32)
return x,y
(x,y),(x_test,y_test) = datasets.fashion_mnist.load_data()
print(x.shape,y.shape)
batchsz = 128
#构造数据集
db = tf.data.Dataset.from_tensor_slices((x,y))
#对数据集做预处理
#传入函数,对每一个(x,y)都进行处理
#.shuffle
#.batch,这里数据集比较小,因此batchsz设置大一些
db = db.map(preprocess).shuffle(10000).batch(batchsz)
#对test部分做同样处理
db_test = tf.data.Dataset.from_tensor_slices((x_test,y_test))
#对测试集来说,测试样本,因此不需要shuffle
db_test = db_test.map(preprocess).batch(batchsz)
#看样本,iter为迭代器,next去下个batch样本
db_iter = iter(db)
sample = next(db_iter)
#batch: (128, 28, 28) (128,)
print('batch:', sample[0].shape,sample[1].shape)
#到此数据已经准备完毕!!
#新建网络
#Sequential是容器,把每个层都传给容器后,它就是网络了
#一层相当于之前的->
# => [b,256] + [b,256] => [b,256]
# h1 = x @ w1 + b1
# # 非线性转化
# h1 = tf.nn.relu(h1)
model = Sequential([
#relu为激活函数
#这里不需要写输入维度,因为TensorFlow会自动推算
layers.Dense(256,activation=tf.nn.relu), # [b,784] -> [b,256]
layers.Dense(128,activation=tf.nn.relu), # [b,256] -> [b,128]
layers.Dense(64,activation=tf.nn.relu), # [b,128] -> [b,64]
layers.Dense(32,activation=tf.nn.relu), # [b,64] -> [b,32]
#最后一层一般不需要激活函数
layers.Dense(10), # [b,32] -> [b,10] , 330 = 32*10(w)+10(b)
#中间层的设置是随便取的,一般是从大到小降维的概念,因为对图片数据来说,输入的维度远远大于类型的维度
])
#首先喂一个输入来构建权值
model.build(input_shape=[None,28*28])
#打印网络结构
model.summary()
# Model: "sequential"
# _________________________________________________________________
# Layer (type) Output Shape Param #
# =================================================================
# dense (Dense) multiple 200960
# _________________________________________________________________
# dense_1 (Dense) multiple 32896
# _________________________________________________________________
# dense_2 (Dense) multiple 8256
# _________________________________________________________________
# dense_3 (Dense) multiple 2080
# _________________________________________________________________
# dense_4 (Dense) multiple 330
# =================================================================
# Total params: 244,522
# Trainable params: 244,522
# Non-trainable params: 0
# _________________________________________________________________
#总参数量24万,每个链接是4字节的float,大约是100万字节/1000≈100K
#创建优化器
#实质上作用是 w = w - lr*grad
optimizer = optimizers.Adam(lr=1e-3)
def main():
for epoch in range(30):
for step,(x,y) in enumerate(db):
# x: [b,28,28]
# y: [b]
# x: [b,28,28] => [b,784]
x = tf.reshape(x,[-1,28*28])
#保存梯度信息
with tf.GradientTape() as tape:
#使用model(),前向传播;输入x,x[b,28*28],经过model后会得到[b,10]
logits = model(x)
y_onehot = tf.one_hot(y,depth=10)
#MSE返回[b],reduce_mean返回loss均值(标量),相当于以前的loss = tf.square(y_onehot - out)和loss = tf.reduce_mean(loss)...
loss_mse = tf.reduce_mean(tf.losses.MSE(y_onehot,logits))
#上下两种皆可,它们是两种误差计算方法
loss_ce = tf.losses.categorical_crossentropy(y_onehot,logits,from_logits=True)
#获得标量,loss是针对instance做的,所以需要求均值
loss_ce = tf.reduce_mean(loss_ce)
#trainable_variables为要更新的参数,相当于之前的grads = tape.gradient(loss, [w1, b1, w2, b2, w3, b3])...
#这里使用categorical_crossentropy误差计算,这里也可以填loss_mse,结果大同小异
grads = tape.gradient(loss_ce,model.trainable_variables)
#zip是把2个元素,梯度在前,参数在后,进行w = w - lr*grad原地更新。相当于之前的w1.assign_sub(lr * grads[0])...
optimizer.apply_gradients(zip(grads,model.trainable_variables))
if step % 100 == 0:
print(epoch,step,'loss:',float(loss_ce),float(loss_mse))
#test测试
total_correct = 0
total_num = 0
for x,y in db_test:
# x: [b,28,28]
# y: [b]
# x: [b,28,28] => [b,784]
x = tf.reshape(x,[-1,28*28])
#这里不需要求梯度,因此也不需要GradientTape
#输出是[b,10]
logits = model(x)
#得到输出后,对测试来讲,只需要求出概率最大的值所在的索引
# logits => prob,[b,10]。使用softmax,把原来实数范围的值转换到0~1的范围,而且保证概率和为1
prob = tf.nn.softmax(logits,axis=1)
# [b,10] => [b]
pred = tf.argmax(prob,axis=1)
#为了能和y比较,从64转成32
pred = tf.cast(pred,dtype=tf.int32)
#比较
#pred:[b],已经argmax过
#y不变: [b]
correct = tf.equal(pred,y)
#correct: [b], True: equal, False: not equal;cast bool转换成int;reduce_sum求和;
correct = tf.reduce_sum(tf.cast(correct,dtype=tf.int32))
#加correct的数量,int用来转换成numpy
total_correct += int(correct)
#加batch数量
total_num += x.shape[0]
#一个epoch打印一次正确率
acc = total_correct / total_num
print(epoch,'test acc:',acc)
#一次epoch就能到0.84的原因,第一、网络变深了;第二、之前用的是w = w - lr*grad,现在用更好的优化器Adam,训练起来快很多
#和之前张量相比(代码41课)
# 要创建很多Tensor,要额外的管理,
# forward也不需要从输出到输入,直接使用model()搞定
# 所有的参数trainable_variables,不需要一个个寻找
if __name__ == "__main__":
main()
TensorBoard可视化
Tensor Flow(引言)
可视化图中的数据,并用曲线的方式展示。
-
TensorBoard
主要TensorFlow
-
Visdom
主要pytorch
TensorBoard
- Installation
- Curves (loss/acc的监听)
- Image Visualization (图片的可视化)
Installation
pip install tensorboard
(默认已经安装)
Principle(工作原理)
- Listen logdir (监听目录)
- build summary instance (新建summary instance)
- fed data into summary instance (喂数据给summary instance)
CPU会将一些更新的数据(比如loss)写入Disk,比如logs目录。有一个listener监听器,监听logs目录,只要目录有变化,就会把数据更新web。
Step1.run listener
(tfEnvi) D:\tfCCode\79_可视化>tensorboard --logdir logs
2021-03-21 14:00:33.287413: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cudart64_101.dll
Serving TensorBoard on localhost; to expose to the network, use a proxy or pass --bind_all
TensorBoard 2.3.0 at http://localhost:6006/ (Press CTRL+C to quit)
Step2.build summary
在代码中进行,新建summary,这个是接口,具体不需要关心
tf.summary.create_file_writer(log_dir) #log_dir为监听路径
Step3.fed scalar(喂标量数据)
tf.summary.scalar(...)
step
默认是x轴,float()
是将Tensor格式转换为np或具体数值格式。
名字为loss,数值更新为float(loss)
Step3.fed single Image(喂图片数据)
tf.summary.image(...)
[1,28,28,1]:1张图片,28*28,1个通道
step
随便
结果:
Step3.fed multi-images(喂多张图片)
tf.summary.image(...)
reshape
,图片集转化成[b,28,28,1]
结果:
不太美观。
自己设计:
image_grid()
:[16,28,28]组合成1张图片
效果:
代码实现
import os
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets, layers, optimizers, Sequential, metrics
import datetime
from matplotlib import pyplot as plt
import io
#预处理函数
def preprocess(x,y):
x = tf.cast(x,dtype=tf.float32) /255.
y = tf.cast(y,dtype=tf.int32)
return x,y
#把多张图片拼接成1张图片,plot_to_image和image_grid
def plot_to_image(figure):
"""Converts the matplorlib plot specified by 'figure' to a PNG image and
returns it. The supplied figure is closed and inaccessible after this call."""
#Save the plot to a PNG in memory
buf = io.BytesIO()
plt.savefig(buf,format='png')
# Closing the figure prevents it from being displayed directly inside
# the notebook.
plt.close(figure)
buf.seek(0)
#Convert PNG buffer to TF image
image = tf.image.decode_png(buf.getvalue(),channels=4)
# Add the batch dimension
image = tf.expand_dims(image,0)
return image
def image_grid(images):
"""Return a 5*5 grid of the MNIST images as a matplotlib figure."""
#Create a figure to contain the plot.
figure = plt.figure(figsize=(10,10))
for i in range(25):
#Start next subplot
plt.subplot(5,5,i+1,title='name')
plt.xticks([])
plt.yticks([])
plt.grid(False)
plt.imshow(images[i],cmap=plt.cm.binary)
return figure
(x,y),(x_test,y_test) = datasets.fashion_mnist.load_data()
batchsz = 128
#构造数据集
db = tf.data.Dataset.from_tensor_slices((x,y))
#对数据集做预处理
db = db.map(preprocess).shuffle(60000).batch(batchsz).repeat(10)
#对test部分做同样处理
db_test = tf.data.Dataset.from_tensor_slices((x_test,y_test))
db_test = db_test.map(preprocess).batch(batchsz,drop_remainder=True)
#新建网络
model = Sequential([
layers.Dense(256,activation=tf.nn.relu), # [b,784] -> [b,256]
layers.Dense(128,activation=tf.nn.relu), # [b,256] -> [b,128]
layers.Dense(64,activation=tf.nn.relu), # [b,128] -> [b,64]
layers.Dense(32,activation=tf.nn.relu), # [b,64] -> [b,32]
layers.Dense(10), # [b,32] -> [b,10] , 330 = 32*10(w)+10(b)
])
#首先喂一个输入来构建权值
model.build(input_shape=[None,28*28])
#打印网络结构
model.summary()
#创建优化器
optimizer = optimizers.Adam(lr=0.01)
####创建writer(summary)
current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
log_dir = 'logs/' + current_time
summary_writter = tf.summary.create_file_writer(log_dir)
#get x from (x,y)
sample_img = next(iter(db))[0]
#get first image instance
sample_img = sample_img[0]
sample_img = tf.reshape(sample_img,[1,28,28,1])
with summary_writter.as_default():
tf.summary.image("Training sample:", sample_img,step=0)
def main():
for step,(x,y) in enumerate(db):
with tf.GradientTape() as tape:
x = tf.reshape(x, [-1, 28 * 28])
logits = model(x)
y_onehot = tf.one_hot(y,depth=10)
loss_ce = tf.losses.categorical_crossentropy(y_onehot,logits,from_logits=True)
loss_ce = tf.reduce_mean(loss_ce)
grads = tape.gradient(loss_ce,model.trainable_variables)
optimizer.apply_gradients(zip(grads,model.trainable_variables))
if step % 100 == 0:
print(step,'loss:',float(loss_ce))
####喂loss数据
with summary_writter.as_default():
tf.summary.scalar('train-loss',float(loss_ce),step=step)
# test测试
if step % 500 == 0:
total_correct = 0
total_num = 0
for x,y in db_test:
x = tf.reshape(x,[-1,28*28])
logits = model(x)
prob = tf.nn.softmax(logits,axis=1)
pred = tf.argmax(prob,axis=1)
pred = tf.cast(pred,dtype=tf.int32)
correct = tf.equal(pred,y)
correct = tf.reduce_sum(tf.cast(correct,dtype=tf.int32))
total_correct += int(correct)
total_num += x.shape[0]
acc = total_correct / total_num
print(step, 'Evaluate Acc:', acc)
#喂Acc
#print(x,shape)
val_images = x[:25]
val_images = tf.reshape(val_images,[-1,28,28,1])
with summary_writter.as_default():
tf.summary.scalar('test-acc',float(acc),step=step)
#喂图(一张一张)
tf.summary.image("val-onebyone-images:",val_images,max_outputs=25,step=step)
#喂图(一整张)
val_images = tf.reshape(val_images,[-1,28,28])
figure = image_grid(val_images)
tf.summary.image('val-images:',plot_to_image(figure),step=step)
if __name__ == "__main__":
main()