深度学习-加载MNIST数据集

最新推荐文章于 2024-05-07 14:33:04 发布
Big Whale
最新推荐文章于 2024-05-07 14:33:04 发布
阅读量1k
点赞数
文章标签： python 机器学习深度学习
本文链接：https://blog.csdn.net/weixin_52860733/article/details/126629824
版权
'''import numpy as np
# 1.采样数据
data = []                              # 保存样本集的列表
for i in range(100):                   # 循环采样100个点
    x = np.random.uniform(-10.,10.)    # 随机采样输入x
    # 采样高斯噪声
    eps = np.random.normal(0.,0.01)
    # 得到模型输出
    y = 1.477*x + 0.089 + eps
    data.append([x,y])                 # 保存样本点
data = np.array(data)                  # 转换为2DNumpy数组
# 2.计算误差
def mse(b, w, points):
    # 根据当前的w,b参数计算均方差损失
    totalError = 0
    for i in range(0,len(points)):     # 环所有迭代点
        x = points[i, 0]               # 得i号点的输入x
        y = points[i, 1]               # 得i号点的输出y
        #计算差的平方，并累加
        totalError += (y - (w*x + b)) ** 2
    # 将累加的误差求平均，得到均方差
    return totalError/float(len(points))
# 3.计算梯度
def step_grdient(b_current, w_current, points, lr):
    # 计算误差函数所在点上的导数，并更新w,b
    b_gradient = 0
    w_gradient = 0
    M = float(len(points))             # 总样本数
    for i in range(0, len(points)):
        x = points[i, 0]
        y = points[i, 1]
        # 误差函数对b的导数：grad_b = 2(wx+b-y)，参考式（2.3）
        b_gradient += (2/M)*((w_current* x + b_current)-y)
        # 误差函数对w的导数：grad_b = 2(wx+b-y)* x，参考式（2.2）
        w_gradient += (2/M)*x*((w_current *x + b_current) - y)
    # 根据梯度下降算法更新w',b',其中lr为学习率
    new_b = b_current - (lr * b_gradient)
    new_w = w_current - (lr * w_gradient)
    return[new_b, new_w]
# 4.梯度更新
def gradient_descent(points, starting_b, starting_w, lr, num_iterations):
    # 循环更新w,b多次
    b = starting_b                    # b的初始值
    w = starting_w                    # w的初始值
    # 根据梯度下降算法更新多次
    for step in range(num_iterations):
        # 计算梯度并更新一次
        [b,w] = step_gradient(b,w, np.array(points),lr)
        loss = mse(b,w,points)        # 计算当前的均方差，用于监控训练进度
        if step % 50 == 0:            # 打印误差和实时的w，b值
            print(f"iteration:{step}, loss:{loss}, w:{w}, b:{b}")
    return [b,w]                      # 返回最后一次的w,b
# 主训练函数实现如下：
def main():
    # 加载训练集数据，这些数据是通过真实模拟添加观测误差采样得到的
    lr = 0.01                         # 学习率
    initial_b = 0                     # 初始化b为0
    initial_w = 0                     # 初始化w为0
    num_iterations = 1000
    # 训练优化1000次，返回最优w*，b*和训练Loss的下降过程
    [b,w] = gradient_descent(data, initial_b, initial_w, lr, num_iterations)
    loss = mse(b,w,data)              # 计算最优数值解w,b的均方差
    print(f'Final loss:{loss},w:{w},b:{b}')'''
# one-hot编码
'''import tensorflow as tf                               # 导入TF库
y = tf.constant([0,1,2,3,4])
y = tf.one_hot(y,depth=10)
print(y)'''


import os
import tensorflow as tf                               # 导入TF库
from tensorflow import keras                            # 导入TF子库keras
from tensorflow.keras import layers, optimizers, datasets        # 导入TF子库
(x,y), (x_val, y_val) = datasets.mnist.load_data()              # 加载MNIST数据集
x = 2*tf.convert_to_tensor(x, dtype=tf.float32)/255.-1         # 转换为浮点张量，并缩放到-1~1
y = tf.convert_to_tensor(y, dtype=tf.int32)                    # 转换为整型张量
y = tf.one_hot(y, depth=10)
print(x.shape, y.shape)
train_dataset = tf.data.Dataset.from_tensor_slices((x, y))       # 构建数据集对象
train_dataset = train_dataset.batch(512)                         # 批量训练

# 创建一层网络，设置输出节点数为258，激活函数类型为ReLU
layers.Dense(256, activation = 'relu')
# 利用Sequential 容器封装3个网络层，前网络层的输出默认作为下一层的输入
model = keras.Sequential([                                   # 3个非线性层的嵌套模型
    layers.Dense(256,activation='relu'),                     # 隐藏层1
    layers.Dense(128,activation='relu'),                     # 隐藏层2
    layers.Dense(10)])                                       # 输出层，输出节点数为10
with tf.GradientTape() as tape:                              # 构建梯度记录环境
    # 打平操作，[b,28,28]=>[b,10]
    out = model(x)
    # [b]=>[b,10]
    y_onehot = tf.one_hot(y,depth=10)
    # 计算差的平方和[b,10]
    loss = tf.square(out-y_onehot)
    # 计算每个样本的平均误差，[b]
    loss = tf.reduce_sum(loss)/x.shape[0]
    # Step3 计算参数的梯度w1, w2, w3, b1, b2, b3
grads = tape.gradient(loss,model.trainable_variables)
# 自动计算梯度
grads = tape.gradient(loss, model.trainable_variables)
# w' = w-lr*grad,更新网格参数
optimizer.apply_gradients(zip(grads,model.trainable_variables))
# 每层的张量都需要被优化，故使用Variable类型，并使用截断的正态分布初始化权值张量
# 偏置向量初始化为0即可
# 第一层的参数
w1 = tf.Variable(tf.random.truncated_normal([784,256], stddev=0.1))
b1 = tf.Variable(tf.zeros([256]))
# 第二层的参数
w2 = tf.Variable(tf.random.truncated_normal([256,128], stddev=0.1))
b2 = tf.Variable(tf.zeros([128]))
# 第三层参数
w3 = tf.Variable(tf.random.truncated_normal([128,10], stddev=0.1))
b3 = tf.Variable(tf.zeros([10]))
# 改变视图，[b,28,28]=>[b,28*28]
x = tf.reshape(x,[-1,28*28])
# 第一层计算，[b,784]@[784,256]+[256]=>[b,256]+[256]=>[b,256]+[b,256]
h1 = x@w1 + tf.broadcast_to(b1,[x.shape[0],256])
h1 = tf.nn.relu(h1)                        # 通过激活函数
# 第二层计算，[b,256]=>[b,128]
h2 = h1@w2 +b2
h2 = tf.nn.relu(h2)
# 输出计算层，[b,128]=>[b,10]
out = h2@w3 + b3
# 计算网络输出与标签之间的方差，mse = mean(sum(y-out)^2)
# [b,10]
loss = tf.square(y_onehot - out)
# 误差标量，mean:scalar
loss = tf.reduce_mean(loss)
# 自动梯度，需要求梯度的 张量有[w1,b1,w2,b2,w3,b3]
grads = tape.gradient(loss[w1,b1,w2,b2,w3,b3])
# 梯度更新，assign_sub将当前值减去参数值，原地更新
w1.assign_sub(lr*grads[0])
b1.assign_sub(lr*grads[1])
w2.assign_sub(lr*grads[2])
b2.assign_sub(lr*grads[3])
w3.assign_sub(lr*grads[4])
b3.assign_sub(lr*grads[5])