手写数字体识别
手写数字体属于监督学习中的分类算问题。Image Classification中经典数据集ImageNet解决的就是图像分类问题
数据集:MNIST
对线性回归方程引入一个非线性因子ReLu函数,使得线性回归方程变成非线性方程。
一层的非线性模型过于简单,因此通过多层非线性函数叠加:将上一层的输出作为下一层的输入。增加模型的非线性因子和复杂度。
数据预处理
train/test splitting 分割数据集
把数据集分为Train
图片大小为28*28
,每个像素点都是一个[0-255]
的灰度值:手写数字体的每一张图片数据格式为[28,28,1]
。输入将图片打平
Input and Output
- 输入为:x:[b,784]
输出分类用OneHotCoding编码表示
输出结果为置信度的最大的类别,所有类别置信度之和为1
神经网络的训练
- step1 Compute [h1,h2,out]
- step2 Compute Loss
- step3 Compute Gradient and update [W1`, b`, W2`, b2`,W3`,b3`]
- step4 loop
- prediction
Computation Graph
- out = x@w + b
- x:[1,784]
- W:[784,10]
- b:[10]
Loss
Loss = MSE(out,label)
MSE计算输出值与真实值之间的欧式距离
- minimize loss 更新w`和b`
step 0 x and y
import tensorflow as tf
from tensorflow.keras import datasets, layers, optimizers
(xs,ys),_ = datasets.mnist.load_data()
print('datasets:',xs.shape,ys.shape)
xs = tf.convert_to_tensor(xs, dtype = tf.float32)/255.
db = td.data.Dataset.from_tensor_slices((xs,ys))
for step, (x, y) in enumerate(db):
print(step, x.shape, y, y.shape)
model & optimizers
model = tf.kearas.Sequential([
layers.Dense(512,activation="relu"),
layers.Dense(256,activation="relu"),
layers.Dense(10)])
#Dense层:全连接层
optimizers = optimizers.SGD(learning_rate = 0.001)
#optimizer:梯度下降梯度的更新方式
#SGD:随机梯度下降算法
step1 & step2 Compute out & loss
with tf.GradientTape() as tape :
#[b,28,28] => [b, 784]
x =tf.reshape(x, (-1, 28*28))
#把图片由[28, 28, 1]打平成 [1, 784]
#step1. compute output
#[b, 784] => [b, 10]
out = model(x)
#进行前向运算,拿到out用来计算loss值
#step2 compute loss
loss = tf.reduce_sum(tf.square(out - y)) / x.shape[0]
#square:平方
计算out与y的欧式距离
step3 loss & updata
# step3 optimize and update w1, w2, w3, b1, b2, b3
grads = tape.gradient(loss, model.trainable_variables)
# 通过loss值与模型的参数值计算每个参数的梯度值
# w` = w - lr * grad
optimizers.apply_gradients(zip(grads, model.trainable_variables))
# 根据grads梯度值自动对模型参数进行更新
loop
def train_epoch(epoch):
#step4 loop
for step, (x,y) in enumerate(train_dataset):
with tf.GradientTape() as tape:
# [b,28,28] => [b, 784]
x = tf.reshape(x, (-1, 28 * 28))
# 把图片由[28, 28, 1]打平成 [1, 784]
# step1. compute output
# [b, 784] => [b, 10]
out = model(x)
# 进行前向运算,拿到out用来计算loss值
# step2 compute loss
loss = tf.reduce_sum(tf.square(out - y)) / x.shape[0]
# square:平方,计算out与y的欧式距离
# step3 optimize and update w1, w2, w3, b1, b2, b3
grads = tape.gradient(loss, model.trainable_variables)
# 通过loss值与模型的参数值计算每个参数的梯度值
# w` = w - lr * grad
optimizers.apply_gradients(zip(grads, model.trainable_variables))
# 根据grads梯度值自动对模型参数进行更新
if step % 100 ==0:
print(epoch, step, loss.numpy())
ALL code
环境
- anaconda3
- tensorflow2
- python3.7
import os
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
#可以减少运行时的警告信息
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, optimizers, datasets
(x, y), (x_val, y_val) = datasets.mnist.load_data()
x = tf.convert_to_tensor(x, dtype=tf.float32) / 255.
y = tf.convert_to_tensor(y, dtype=tf.int32)
y = tf.one_hot(y, depth=10)
print(x.shape, y.shape)
train_dataset = tf.data.Dataset.from_tensor_slices((x, y))
train_dataset = train_dataset.batch(200)
model = keras.Sequential([
layers.Dense(512, activation='relu'),
layers.Dense(256, activation='relu'),
layers.Dense(10)])
optimizer = optimizers.SGD(learning_rate=0.001)
def train_epoch(epoch):
# Step4.loop
for step, (x, y) in enumerate(train_dataset):
with tf.GradientTape() as tape:
# [b, 28, 28] => [b, 784]
x = tf.reshape(x, (-1, 28*28))
# Step1. compute output
# [b, 784] => [b, 10]
out = model(x)
# Step2. compute loss
loss = tf.reduce_sum(tf.square(out - y)) / x.shape[0]
# Step3. optimize and update w1, w2, w3, b1, b2, b3
grads = tape.gradient(loss, model.trainable_variables)
# w' = w - lr * grad
optimizer.apply_gradients(zip(grads, model.trainable_variables))
if step % 100 == 0:
print(epoch, step, 'loss:', loss.numpy())
def train():
for epoch in range(30):
train_epoch(epoch)