前向传播(张量)-实战
What we have learned
create tensor 创建张量 indexing and slices 索引和切片 reshape and broadcasting 重塑与传播 math operations 数学运算
Recap
𝑜𝑢𝑡 = 𝑟𝑒𝑙𝑢{𝑟𝑒𝑙𝑢{𝑟𝑒𝑙𝑢{[𝑋@𝑊1 + 𝑏1]}@𝑊2 + 𝑏2}@𝑊3 + 𝑏3} 𝑝𝑟𝑒𝑑 = 𝑎𝑟𝑔𝑚𝑎𝑥(𝑜𝑢𝑡) 𝑙𝑜𝑠𝑠 = 𝑀𝑆𝐸(𝑜𝑢𝑡, 𝑙𝑎𝑏𝑒𝑙) minimize 𝑙𝑜𝑠𝑠
[
W
1
′
,
b
1
′
,
W
2
′
,
b
2
′
,
W
3
′
,
b
3
′
]
[W_{1}^{'},b_{1}^{'},W_{2}^{'},b_{2}^{'},W_{3}^{'},b_{3}^{'}]
[ W 1 ′ , b 1 ′ , W 2 ′ , b 2 ′ , W 3 ′ , b 3 ′ ]
import tensorflow as tf
from tensorflow import keras
from tensorflow. keras import datasets
import os
os. environ[ 'TF_CPP_MIN_LOG_LEVEL' ] = '2'
( x, y) , _ = datasets. mnist. load_data( )
x = tf. convert_to_tensor( x, dtype= tf. float32)
y = tf. convert_to_tensor( y, dtype= tf. int32)
print ( x. shape, y. shape, x. dtype, y. dtype)
(60000, 28, 28) (60000,) <dtype: 'float32'> <dtype: 'int32'>
print ( tf. reduce_min( x) , tf. reduce_max( x) )
print ( tf. reduce_min( y) , tf. reduce_max( y) )
tf.Tensor(0.0, shape=(), dtype=float32) tf.Tensor(255.0, shape=(), dtype=float32)
tf.Tensor(0, shape=(), dtype=int32) tf.Tensor(9, shape=(), dtype=int32)
x = tf. convert_to_tensor( x, dtype= tf. float32) / 255.0
print ( tf. reduce_min( x) , tf. reduce_max( x) )
tf.Tensor(0.0, shape=(), dtype=float32) tf.Tensor(1.0, shape=(), dtype=float32)
train_db = tf. data. Dataset. from_tensor_slices( ( x, y) ) . batch( 128 )
train_iter = iter ( train_db)
sample = next ( train_iter)
print ( 'batch:' , sample[ 0 ] . shape, sample[ 1 ] . shape)
sample = next ( train_iter)
print ( 'batch:' , sample[ 0 ] . shape, sample[ 1 ] . shape)
batch: (128, 28, 28) (128,)
batch: (128, 28, 28) (128,)
w1 = tf. random. truncated_normal( [ 784 , 256 ] , stddev= 0.1 )
b1 = tf. zeros( [ 256 ] )
w2 = tf. random. truncated_normal( [ 256 , 128 ] , stddev= 0.1 )
b2 = tf. zeros( [ 128 ] )
w3 = tf. random. truncated_normal( [ 128 , 10 ] , stddev= 0.1 )
b3 = tf. zeros( [ 10 ] )
b3
<tf.Tensor: shape=(10,), dtype=float32, numpy=array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32)>
tf.GradientTape()
记录自动区分操作。 默认跟踪类型:tf.Variable 所以数据需要包装
w1 = tf. Variable( tf. random. truncated_normal( [ 784 , 256 ] , stddev= 0.1 ) )
b1 = tf. Variable( tf. zeros( [ 256 ] ) )
w2 = tf. Variable( tf. random. truncated_normal( [ 256 , 128 ] , stddev= 0.1 ) )
b2 = tf. Variable( tf. zeros( [ 128 ] ) )
w3 = tf. Variable( tf. random. truncated_normal( [ 128 , 10 ] , stddev= 0.1 ) )
b3 = tf. Variable( tf. zeros( [ 10 ] ) )
lr = 1e - 3
for epoch in range ( 10 ) :
for step, ( x, y) in enumerate ( train_db) :
x = tf. reshape( x, [ - 1 , 28 * 28 ] )
with tf. GradientTape( ) as tape:
h1 = x @ w1 + tf. broadcast_to( b1, [ x. shape[ 0 ] , 256 ] )
h1 = tf. nn. relu( h1)
h2 = h1 @ w2 + b2
h2 = tf. nn. relu( h2)
out = h2 @ w3 + b3
y_onehot = tf. one_hot( y, depth= 10 )
loss = tf. square( y_onehot - out)
loss = tf. reduce_mean( loss)
grads = tape. gradient( loss, [ w1, b1, w2, b2, w3, b3] )
w1. assign_sub( lr * grads[ 0 ] )
b1. assign_sub( lr * grads[ 1 ] )
w2. assign_sub( lr * grads[ 2 ] )
b2. assign_sub( lr * grads[ 3 ] )
w3. assign_sub( lr * grads[ 4 ] )
b3. assign_sub( lr * grads[ 5 ] )
if step % 100 == 0 :
print ( epoch, step, 'loss:' , float ( loss) )
0 0 loss: 0.4980630874633789
0 100 loss: 0.20007649064064026
0 200 loss: 0.1879170686006546
0 300 loss: 0.1585090458393097
0 400 loss: 0.17265315353870392
1 0 loss: 0.15226225554943085
1 100 loss: 0.14862820506095886
1 200 loss: 0.15506379306316376
1 300 loss: 0.13298840820789337
1 400 loss: 0.14317986369132996
2 0 loss: 0.12775573134422302
2 100 loss: 0.13045187294483185
2 200 loss: 0.13609960675239563
2 300 loss: 0.11735637485980988
2 400 loss: 0.1252080500125885
3 0 loss: 0.11262925714254379
3 100 loss: 0.11829018592834473
3 200 loss: 0.12328527122735977
3 300 loss: 0.10690398514270782
3 400 loss: 0.11292298138141632
4 0 loss: 0.10224197059869766
4 100 loss: 0.10958822816610336
4 200 loss: 0.11386322975158691
4 300 loss: 0.0994718074798584
4 400 loss: 0.10404429584741592
5 0 loss: 0.09465127438306808
5 100 loss: 0.10307874530553818
5 200 loss: 0.10661985725164413
5 300 loss: 0.09376838058233261
5 400 loss: 0.09735797345638275
6 0 loss: 0.08888636529445648
6 100 loss: 0.09798622876405716
6 200 loss: 0.1008676290512085
6 300 loss: 0.08918877691030502
6 400 loss: 0.09213654696941376
7 0 loss: 0.08434510231018066
7 100 loss: 0.09376645088195801
7 200 loss: 0.0961538702249527
7 300 loss: 0.08544725924730301
7 400 loss: 0.08790375292301178
8 0 loss: 0.0806211456656456
8 100 loss: 0.09019575268030167
8 200 loss: 0.09219495952129364
8 300 loss: 0.0822930559515953
8 400 loss: 0.08442799001932144
9 0 loss: 0.07753437757492065
9 100 loss: 0.08713443577289581
9 200 loss: 0.08879637718200684
9 300 loss: 0.07959921658039093
9 400 loss: 0.08146005868911743