python实现-Mnist数据集实战-注释超详细

本文链接：https://blog.csdn.net/shengxing_stu/article/details/110919247
import tensorflow as tf
from tensorflow.keras import datasets
from matplotlib import pyplot as plt
import os
import pysnooper

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'


# 自定义的预处理函数
def preprocess(x, y):
    # 调用此函数时会自动传入x,y对象，shape为[b,28,28],[b]
    # 标准化到0-1
    x = tf.cast(x, dtype=tf.float32) / 255.
    # 打平,-1代表不知道有多少图片
    x = tf.reshape(x, [-1, 28 * 28])
    # 转成整型张量
    y = tf.cast(y, dtype=tf.int32)
    # one-hot编码
    y = tf.one_hot(y, depth=10)
    # 返回的x,y将替换传入的x,y参数，从而实现数据的预处理功能
    return x, y


# 加载MNIST数据集
(x, y), (x_test, y_test) = datasets.mnist.load_data()
# 构建Dataset对象
train_db = tf.data.Dataset.from_tensor_slices((x, y))
# 随机打散样本,不会打乱样本与标签映射关系
train_db = train_db.shuffle(1000)
# 设置批处理,batch size为128
train_db = train_db.batch(128)
# 预处理函数实现在preprocess中，传入参数名即可,.map()操作是将传入的两个字段做映射，分别传入函数、分别返回数值
train_db = train_db.map(preprocess)
# 设置重复次数为20次
train_db = train_db.repeat(20)

test_db = tf.data.Dataset.from_tensor_slices((x_test, y_test))
test_db = test_db.shuffle(1000).batch(128).map(preprocess)
x, y = next(iter(train_db))
# print(train_db)

# print('train sample:', x.shape, y.shape)


# @pysnooper.snoop(watch='step')
def main():
    lr = 1e-2
    acce, losses = [], []
    # 784 -> 512
    w1, b1 = tf.Variable(tf.random.normal([784, 256], stddev=0.1)), tf.Variable(tf.zeros([256]))
    # 512 -> 256
    w2, b2 = tf.Variable(tf.random.normal([256, 128], stddev=0.1)), tf.Variable(tf.zeros([128]))
    # 256 ->10
    w3, b3 = tf.Variable(tf.random.normal([128, 10], stddev=0.1)), tf.Variable(tf.zeros([10]))
    # 训练世代数
    # 迭代数据集对象，带step参数
    for step, (x, y) in enumerate(train_db):
        # [b,28,28] => [b,784]
        x = tf.reshape(x, (-1, 784))
        with tf.GradientTape() as tape:
            # layer1
            h1 = x @ w1 + b1
            h1 = tf.nn.relu(h1)
            # layer2
            h2 = h1 @ w2 + b2
            h2 = tf.nn.relu(h2)
            # output
            out = h2 @ w3 + b3

            # loss
            loss = tf.square(y-out)
            # 均方误差
            loss = tf.reduce_mean(loss)
        grads = tape.gradient(loss, [w1, b1, w2, b2, w3, b3])
        for p, q in zip([w1, b1, w2, b2, w3, b3], grads):
            p.assign_sub(lr * q)

        if step % 50 == 0:
            print(step, 'loss:', float(loss))
            losses.append(float(loss))
        if step % 50 == 0:
            total, total_correct = 0., 0

            for x, y in test_db:
                # layer1.
                h1 = x @ w1 + b1
                h1 = tf.nn.relu(h1)
                # layer2
                h2 = h1 @ w2 + b2
                h2 = tf.nn.relu(h2)
                # output
                out = h2 @ w3 + b3
                # [b, 10] => [b]
                pred = tf.argmax(out, axis=1)
                # convert one_hot y to number y
                y = tf.argmax(y, axis=1)
                # bool type
                correct = tf.equal(pred, y)
                # bool tensor => int tensor => numpy
                total_correct += tf.reduce_sum(tf.cast(correct, dtype=tf.int32)).numpy()
                total += x.shape[0]
            print(step, 'Evaluate Acc:', total_correct / total)
            acce.append(total_correct / total)

    plt.rcParams['font.sans-serif'] = ['SimHei']  # 显示中文标签
    plt.rcParams['axes.unicode_minus'] = False
    plt.figure()
    x = [i * 80 for i in range(len(losses))]
    plt.plot(x, losses, label='训练')
    plt.ylabel('MSE')
    plt.xlabel('Step')
    plt.legend()
    # plt.savefig('train.svg')
    plt.show()

    plt.figure()
    plt.plot(x, acce, label='测试')
    plt.ylabel('准确率')
    plt.xlabel('Step')
    plt.legend()
    plt.show()
    # plt.savefig('test.svg')


if __name__ == '__main__':
    main()