import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras import Sequential,losses,optimizers,layers,datasets
lr=0.01
batchsz=512
network=Sequential([#网络容器
layers.Conv2D(6,kernel_size=3,strides=1),#第1个卷积层,6个3*3卷积核
layers.BatchNormalization(),#插入BN层
layers.MaxPooling2D(pool_size=2,strides=2),#高宽各减半的池化层
layers.ReLU(),#激活函数
layers.Conv2D(16,kernel_size=3,strides=1),#第2个卷积层,16个3*3卷积核
layers.BatchNormalization(),#插入BN层
layers.MaxPooling2D(pool_size=2,strides=2),#高宽各减半的池化层
layers.ReLU(),#激活函数
layers.Flatten(),#打平层,方便全连接层处理
layers.Dense(120,activation='relu'),#全连接层,120个节点
layers.BatchNormalization(),#插入BN层
layers.Dense(84,activation='relu'),#全连接层,84个节点
layers.BatchNormalization(),#插入BN层
layers.Dense(10)#10个节点
])
network.build(input_shape=(4,28,28,1))
#network.summary()
(x_train,y_train),(x_test,y_test)=datasets.mnist.load_data()#加载数据集
train_db=tf.data.Dataset.from_tensor_slices((x_train,y_train))#构建Dataset数据集
train_db=train_db.shuffle(10000)#随机打散,防止记忆化
train_db=train_db.batch(batchsz)#批量
def propress(x,y):#预处理函数
x=tf.cast(x,dtype=tf.float32)/255.
x=tf.reshape(x,[-1,28,28])
y=tf.cast(y,dtype=tf.int32)
y=tf.one_hot(y,depth=10)
return x,y
train_db=train_db.map(propress)
train_db=train_db.repeat(20)
#tf.keras.optimizers.SGD(learning_rate=5e-4) 声明了一个梯度下降 优化器 (Optimizer)
optimizer=optimizers.SGD(learning_rate=lr)
#使用交叉熵损失函数
criteon=losses.CategoricalCrossentropy(from_logits=True)
test_db=tf.data.Dataset.from_tensor_slices((x_test,y_test))
test_db=test_db.shuffle(10000)
test_db=test_db.batch(batchsz)
def propress(x,y):
x=tf.cast(x,dtype=tf.float32)/255.
x=tf.reshape(x,[-1,28,28])
y=tf.cast(y,dtype=tf.int32)
y=tf.one_hot(y,depth=10)
return x,y
test_db=test_db.map(propress)
test_db=test_db.repeat(20)
losses=[]
acc=[]
for step,(x,y) in enumerate(train_db):
with tf.GradientTape() as tape:
x=tf.expand_dims(x,axis=3)
#设置网络参数的参数training=True区分BN层是训练还是测试模型
out=network(x,training=True)
loss=criteon(y,out)#前面已经进行了one_hot编码的转化
grades=tape.gradient(loss,network.trainable_variables)
optimizer.apply_gradients(zip(grades,network.trainable_variables))
if step%100 ==0:
print(step,'loss:{}'.format(float(loss)))
losses.append(float(loss))
correct,total=0,0
if step%100==0:
for x,y in test_db:
x=tf.expand_dims(x,axis=3)#(128, 28, 28)
#设置网络参数的参数training=False避免BN层采用错误的行为
out=network(x,training=False)#(128,10)
pred=tf.argmax(out,axis=1)#(128,)
y_test=tf.argmax(y,axis=1)
correct+=tf.reduce_sum(tf.cast(tf.equal(pred,y_test),dtype=tf.int32)).numpy()
total+=x.shape[0]
print(step,"test_acc:{}".format(float(correct/total)))
acc.append(correct/total)
plt.figure()
x=[i*5 for i in range(len(losses))]
plt.plot(x,losses,color='C0',marker='s',label='训练')
plt.xlabel('step')
plt.ylabel('losses')
plt.show()
plt.plot(x,acc,color='C0',marker='s',label='c测试')
plt.xlabel('step')
plt.ylabel('acc')
plt.show()
0 loss:2.755319118499756
0 test_acc:0.1306
100 loss:0.45182928442955017
100 test_acc:0.2676
200 loss:0.33671215176582336
200 test_acc:0.4734
300 loss:0.2975693643093109
300 test_acc:0.7508
400 loss:0.21543249487876892
400 test_acc:0.9093
500 loss:0.2360231578350067
500 test_acc:0.9453
600 loss:0.17904561758041382
600 test_acc:0.9547
700 loss:0.1409425437450409
700 test_acc:0.9591
800 loss:0.1463257521390915
800 test_acc:0.9625
900 loss:0.12103059887886047
900 test_acc:0.9632
1000 loss:0.14103813469409943
1000 test_acc:0.9664
1100 loss:0.11426682770252228
1100 test_acc:0.9665
1200 loss:0.1260887235403061
1200 test_acc:0.9682
1300 loss:0.08206073939800262
1300 test_acc:0.9692
1400 loss:0.12712940573692322
1400 test_acc:0.9708
1500 loss:0.11015598475933075
1500 test_acc:0.9706
1600 loss:0.09226851165294647
1600 test_acc:0.9723
1700 loss:0.11278203129768372
1700 test_acc:0.9726
1800 loss:0.08437538146972656
1800 test_acc:0.9733
1900 loss:0.0878986120223999
1900 test_acc:0.9749
2000 loss:0.0847143679857254
2000 test_acc:0.9756
2100 loss:0.08022576570510864
2100 test_acc:0.9763
2200 loss:0.10397559404373169
2200 test_acc:0.9763
2300 loss:0.07622624933719635
2300 test_acc:0.9775