import tensorflow as tf
from tensorflow import keras
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
w1 = tf.Variable(tf.random.truncated_normal([784, 256], stddev=0.1))
w2 = tf.Variable(tf.random.truncated_normal([256, 128], stddev=0.1))
w3 = tf.Variable(tf.random.truncated_normal([128, 10], stddev=0.1))
b1 = tf.Variable(tf.zeros([256]))
b2 = tf.Variable(tf.zeros([128]))
b3 = tf.Variable(tf.zeros([10]))
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train = tf.convert_to_tensor(x_train, dtype=tf.float32)/255. #转化为张量
y_train = tf.convert_to_tensor(y_train, dtype=tf.int32)
x_train = tf.reshape(x_train, [-1, 28*28])
#一次前项计算
#[60000, 256] @ [256, 128] + [128]
net1 = x_train@w1 + tf.broadcast_to(b1, [x_train.shape[0], 256])#可以直接加b1
out1 = tf.nn.relu(net1)
net2 = out1@w2 + b2
out2 = tf.nn.relu(net2)
out3 = out2@w3 + b3
out3 = tf.nn.softmax(out3)
y_train = tf.one_hot(y_train, depth=10)
loss = tf.nn.softmax_cross_entropy_with_logits(labels=y_train, logits=out3)#交叉熵
loss = tf.reduce_mean(loss)
#反向传播
with tf.GradientTape() as tape:
tape.watch([w1, b1, w2, b2, w3, b3])
out3 = tf.nn.softmax(tf.nn.relu(tf.nn.relu(x_train@w1 + b1)@w2 + b2)@w3 + b3)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_train, logits=out3))
grads = tape.gradient(loss, [w1, b1, w2, b2, w3, b3])
#更新参数
# w = w - lr * grads
lr = 0.01
All_loss = []
for step in range(1001):
with tf.GradientTape() as tape:
tape.watch([w1, b1, w2, b2, w3, b3])
out3 = tf.nn.softmax(tf.nn.relu(tf.nn.relu(x_train @ w1 + b1) @ w2 + b2) @ w3 + b3)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_train, logits=out3))
All_loss.append(loss)
grads = tape.gradient(loss, [w1, b1, w2, b2, w3, b3])
#更新参数
w1.assign_sub(lr*grads[0])
b1.assign_sub(lr*grads[1])
w2.assign_sub(lr*grads[2])
b2.assign_sub(lr*grads[3])
w3.assign_sub(lr*grads[4])
b3.assign_sub(lr*grads[5])
if step % 100 == 0:
print(step, 'loss:', float(loss))
plt.plot(All_loss)
plt.show()
#测试模型
x_test = tf.convert_to_tensor(x_test, dtype=tf.float32)/255.
y_test = tf.convert_to_tensor(y_test, dtype=tf.int32)
x_test = tf.reshape(x_test, [-1, 28*28])
out3 = tf.nn.softmax(tf.nn.relu(tf.nn.relu(x_test@w1 + b1)@w2 + b2)@w3 + b3)
y_predict = tf.math.argmax(out3, axis=-1)
y_test = tf.cast(y_test, tf.int64)
y_c = tf.math.equal(y_predict, y_test)
y_c = tf.cast(y_c, tf.int64)
r = tf.math.reduce_sum(y_c)/10000
print(r)
输出:
0 loss: 2.2990057468414307
100 loss: 2.274552583694458
200 loss: 2.254326581954956
300 loss: 2.235755443572998
400 loss: 2.214735269546509
500 loss: 2.1878538131713867
600 loss: 2.1503565311431885
700 loss: 2.100029230117798
800 loss: 2.043839693069458
900 loss: 1.9901422262191772
1000 loss: 1.9435820579528809
tf.Tensor(0.6674, shape=(), dtype=float64)