分类问题
1.X and Y
2.𝑜𝑢𝑡 = 𝑟𝑒𝑙𝑢{𝑟𝑒𝑙𝑢 𝑟𝑒𝑙𝑢 𝑋@𝑊1 + 𝑏1 @𝑊2 + 𝑏2 @𝑊3 + 𝑏3}
3.Compute out&loss
4.Compute gradient and optimize
5.Loop
代码1
forward_layer.py前向网络
import os
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
#当os.environ["TF_CPP_MIN_LOG_LEVEL"]=0的时候,输出信息:INFO + WARNING + ERROR + FATAL
#当os.environ["TF_CPP_MIN_LOG_LEVEL"]=1的时候,输出信息:WARNING + ERROR + FATAL
#当os.environ["TF_CPP_MIN_LOG_LEVEL"]=2的时候,输出信息:ERROR + FATAL
#当os.environ["TF_CPP_MIN_LOG_LEVEL"]=3的时候,输出信息:FATAL
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers,optimizers,datasets
#数据准备方式影响计算速度
(x,y),(x_val,y_val) = datasets.mnist.load_data()
x = tf.convert_to_tensor(x,dtype=tf.float32) / 255.
y = tf.convert_to_tensor(y,dtype=tf.int32)
y = tf.one_hot(y,depth=10)
print(x.shape,y.shape)
train_dataset = tf.data.Dataset.from_tensor_slices((x,y))
#该函数是dataset核心函数之一,它的作用是把给定的元组、列表和张量等数据进行特征切片。切片的范围是从最外层维度开始的。
# 如果有多个特征进行组合,那么一次切片是把每个组合的最外维度的数据切开,分成一组一组的。
#假设我们现在有两组数据,分别是特征和标签,为了简化说明问题,我们假设每两个特征对应一个标签。
#之后把特征和标签组合成一个tuple,那么我们的想法是让每个标签都恰好对应2个特征,而且像直接切片,
#比如:[f11, f12] [t1]。f11表示第一个数据的第一个特征,f12表示第1个数据的第二个特征,t1表示第一个数据标签。
#那么tf.data.Dataset.from_tensor_slices就是做了这件事情:
# 从数据集中随机取出batch_size个元素,
# #批次大小影响模型性能,小一点会更好
train_dataset = train_dataset.batch(200)
# 模型的设计影响模型的性能
model = keras.Sequential([
layers.Dense(512,activation='relu'),
layers.Dense(256,activation='relu'),
layers.Dense(10)
#dense :全连接层,相当于添加一个层
#tf.layers.dense(inputs,units,activation=None,)
])
optimizer = optimizers.SGD(learning_rate=0.001)
def train_epoch(epoch):
for step,(x,y) in enumerate(train_dataset):
# 梯度带中的变量为trainable_variables,可自动进行求导
with tf.GradientTape() as tape:
# [b, 28, 28] => [b, 784]
x = tf.reshape(x,(-1,28*28))
# Step1. compute output
# [b, 784] => [b, 10]
out = model(x)
# Step2. compute loss
loss = tf.reduce_sum(tf.square(out - y))
# Step3. optimize and update w1, w2, w3, b1, b2, b3
#求导
grads = tape.gradient(loss,model.trainable_variables)
# w' = w - lr * grad
#得到下一次迭代的优化参数w
#zip() 函数用于将可迭代的对象作为参数,将对象中对应的元素打包成一个个元组,然后返回由这些元组组成的列表。
#>>>a = [1,2,3]
#>>> b = [4,5,6]
#>>> c = [4,5,6,7,8]
#>>> zipped = zip(a,b) # 打包为元组的列表
#[(1, 4), (2, 5), (3, 6)]
#>>> zip(a,c) # 元素个数与最短的列表一致
# [(1, 4), (2, 5), (3, 6)]
# 优化器optimizer在工作过程中,它还会随着迭代epoch而调整学习率learning_rate( )
optimizer.apply_gradients(zip(grads, model.trainable_variables))
if step % 100 == 0:
print(epoch,step,'loss:',loss.numpy())
def train():
for epoch in range(30):
train_epoch(epoch)
if __name__ == '__main__':
train()
运行效果:
D:\anaconda\python.exe E:/code/Deep-Learning-with-TensorFlow-book-master/Deep-Learning-with-TensorFlow-book-master/ch03-分类问题/forward_layer.py
(60000, 28, 28) (60000, 10)
0 0 loss: 1.7215357
0 100 loss: 0.9395142
0 200 loss: 0.74100405
1 0 loss: 0.66707647
1 100 loss: 0.68889314
1 200 loss: 0.5701578
2 0 loss: 0.5418442
2 100 loss: 0.6035171
2 200 loss: 0.500142
3 0 loss: 0.48093933
3 100 loss: 0.5532955
3 200 loss: 0.45778134
4 0 loss: 0.44225442
4 100 loss: 0.5177527
4 200 loss: 0.4276848
...
代码2
forward_tensor.py
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import matplotlib
from matplotlib import pyplot as plt
#使用参数字典 rcParams 访问并修改已经加载的配置项
matplotlib.rcParams['font.size'] = 20
matplotlib.rcParams['figure.titlesize'] = 20
matplotlib.rcParams['figure.figsize'] = [9,7]
matplotlib.rcParams['font.family'] = ['STKaiTi']
matplotlib.rcParams['axes.unicode_minus'] = False
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets
# x: [60k, 28, 28],
# y: [60k]
(x,y), _ = datasets.mnist.load_data()
# x: [0~255] => [0~1.]
x = tf.convert_to_tensor(x,dtype=tf.float32) /255.
y = tf.convert_to_tensor(y,dtype=tf.int32)
print(x.shape,y.shape,x.dtype,y.dtype)
#tf.reduce_min函数用来计算一个张量的各个维度上元素的最小值。
print(tf.reduce_min(x),tf.reduce_max(x))
print(tf.reduce_min(y),tf.reduce_max(y))
train_db = tf.data.Dataset.from_tensor_slices((x,y)).batch(128)
#将train_db封装为迭代器对象
#Iterable:
#一类是:list、tuple、dict、set、str
#二类是:generator(都是Iterator对象),包含生成器和带yield的generator function
#生成器不但可以作用于for,还可以被next函数不断调用并且返回下一个值,可以被next函数不断调用返回下一个值的对象称为迭代器(Iterator)。
#可迭代的对象如list、dict等需要用iter()函数转化成Iterator。
train_iter = iter(train_db)
sample = next(train_iter)
print('batch:',sample[0].shape,sample[1].shape)
# [b, 784] => [b, 256] [b, 128] => [b, 10]
# [dim_in, dim_out], [dim_out]
#tf.truncated_normal(shape, mean=0.0, stddev=1.0, dtype=tf.float32, seed=None, name=None)
#从截断的正态分布中输出随机值。shape: 一维的张量,也是输出的张量。stddev: 正态分布的标准差。
w1 = tf.Variable(tf.random.truncated_normal([784,256],stddev=0.1))
#tf.zeros(shape,dtype=tf.float32,name=None),shape代表形状,也就是1纬的还是2纬的还是n纬的数组。
#res2 = tf.zeros([1])结果为:[0.]
#res2 = tf.zeros([2])#结果为:[0. 0.]
#res2 = tf.zeros([2, 4])#结果为:[[0. 0. 0. 0.] [0. 0. 0. 0.]]
b1 = tf.Variable(tf.zeros([256]))
w2 = tf.Variable(tf.random.truncated_normal([256, 128], stddev=0.1))
b2 = tf.Variable(tf.zeros([128]))
w3 = tf.Variable(tf.random.truncated_normal([128, 10], stddev=0.1))
b3 = tf.Variable(tf.zeros([10]))
lr = 1e-3
losses = []
for epoch in range(20): # iterate db for 10
for step,(x,y) in enumerate(train_db):# for every batch
# x:[128, 28, 28]
# y: [128]
# [b, 28, 28] => [b, 28*28]
x = tf.reshape(x,[-1,28*28])
with tf.GradientTape() as tape:
# x: [b, 28*28]
# h1 = x@w1 + b1
# [b, 784]@[784, 256] + [256] => [b, 256] + [256] => [b, 256] + [b, 256]
h1 = x@w1 + tf.broadcast_to(b1,[x.shape[0],256])
h2 = h1@w2 + b2
out = h2@w3 + b3
y_onehot= tf.one_hot(y,depth=10)
loss = tf.square(y_onehot - out)
loss = tf.reduce_mean(loss)
grads = tape.gradient(loss,[w1,b1,w2,b2,w3,b3])
w1.assign_sub(lr * grads[0]) # 参数w1自更新
b1.assign_sub(lr * grads[1])
w2.assign_sub(lr * grads[2])
b2.assign_sub(lr * grads[3])
w3.assign_sub(lr * grads[4])
b3.assign_sub(lr * grads[5])
if step % 100 == 0:
print(epoch,step,'loss:',float(loss))
losses.append(float(loss))
plt.figure()
plt.plot(losses,color='C0',marker='s',label='训练')# label='训练' 为图例
plt.xlabel('Epoch')
plt.legend()# 将样例显示出来
plt.ylabel('MSE')
plt.savefig('forward.svg')
plt.show()
运行效果:
D:\anaconda\python.exe E:/code/Deep-Learning-with-TensorFlow-book-master/Deep-Learning-with-TensorFlow-book-master/ch03-分类问题/forward_tensor.py
(60000, 28, 28) (60000,) <dtype: 'float32'> <dtype: 'int32'>
tf.Tensor(0.0, shape=(), dtype=float32) tf.Tensor(1.0, shape=(), dtype=float32)
tf.Tensor(0, shape=(), dtype=int32) tf.Tensor(9, shape=(), dtype=int32)
batch: (128, 28, 28) (128,)
0 0 loss: 0.6105233430862427
0 100 loss: 0.22369380295276642
0 200 loss: 0.18364770710468292
0 300 loss: 0.1623571515083313
0 400 loss: 0.17271950840950012
1 0 loss: 0.15685400366783142
1 100 loss: 0.1645067036151886
1 200 loss: 0.14837318658828735
1 300 loss: 0.13333944976329803
1 400 loss: 0.14556176960468292
代码3
main.py
import tensorflow as tf
from tensorflow.keras import datasets, layers, optimizers, Sequential, metrics
# 设置GPU使用方式
# 获取GPU列表
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
try:
# 设置GPU为增长式占用
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
except RuntimeError as e:
# 打印异常
print(e)
(xs, ys),_ = datasets.mnist.load_data()
print('datasets:', xs.shape, ys.shape, xs.min(), xs.max())
batch_size = 32
xs = tf.convert_to_tensor(xs, dtype=tf.float32) / 255.
db = tf.data.Dataset.from_tensor_slices((xs,ys))
db = db.batch(batch_size).repeat(30)
model = Sequential([layers.Dense(256, activation='relu'),
layers.Dense(128, activation='relu'),
layers.Dense(10)])
model.build(input_shape=(4, 28*28))
model.summary()
optimizer = optimizers.SGD(lr=0.01)
acc_meter = metrics.Accuracy()
for step, (x,y) in enumerate(db):
with tf.GradientTape() as tape:
# 打平操作,[b, 28, 28] => [b, 784]
x = tf.reshape(x, (-1, 28*28))
# Step1. 得到模型输出output [b, 784] => [b, 10]
out = model(x)
# [b] => [b, 10]
y_onehot = tf.one_hot(y, depth=10)
# 计算差的平方和,[b, 10]
loss = tf.square(out-y_onehot)
# 计算每个样本的平均误差,[b]
loss = tf.reduce_sum(loss) / x.shape[0]
acc_meter.update_state(tf.argmax(out, axis=1), y)
grads = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
if step % 200==0:
print(step, 'loss:', float(loss), 'acc:', acc_meter.result().numpy())
acc_meter.reset_states()
运行效果:
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense (Dense) multiple 200960
_________________________________________________________________
dense_1 (Dense) multiple 32896
_________________________________________________________________
dense_2 (Dense) multiple 1290
=================================================================
Total params: 235,146
Trainable params: 235,146
Non-trainable params: 0
_________________________________________________________________
0 loss: 2.227904796600342 acc: 0.125
200 loss: 0.4319009780883789 acc: 0.69171876
400 loss: 0.38161996006965637 acc: 0.84328127
600 loss: 0.3747447431087494 acc: 0.86515623
800 loss: 0.2814728319644928 acc: 0.88921875
1000 loss: 0.3378041982650757 acc: 0.89453125
1200 loss: 0.31174880266189575 acc: 0.9060938
1400 loss: 0.21993900835514069 acc: 0.915625
1600 loss: 0.24299892783164978 acc: 0.90625
1800 loss: 0.2220946103334427 acc: 0.9265625
2000 loss: 0.23573870956897736 acc: 0.93859375
...