MNIST手写数字数据库的训练集为60,000个示例,而测试集为10,000个示例。
一共4个文件,训练集、训练集标签、测试集、测试集标签,这些数据直接可以用mnist = tf.keras.datasets.mnist导入
1.调用神经网络API代码如下:
import tensorflow as tf
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
# 对数据进行归一化
x_train, x_test = x_train/225.0, x_test/255.0
# 调用API搭建神经网络
model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(input_shape=(28, 28)),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Dropout(0.25),
tf.keras.layers.Dense(10, activation='softmax')
])
# 设置损失函数和梯度下降
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
# 开始训练模型
model.fit(x_train, y_train, epochs=5)
# 查看准确率
model.evaluate(x_test, y_test, verbose=2)
2.手写简单神经网络代码如下:
import tensorflow as tf
import numpy as np
# 获取数据
from tensorflow.keras import datasets
(x_train, y_train),(x_test, y_test) = datasets.mnist.load_data()
# 查看数据的基本情况
print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)
print(x_train[0], type(x_train[0]), y_train[0], type(y_train[0]))
# 对数据进行归一化
x_train, x_test = x_train/225.0, x_test/255.0
# 将转化数据类型
x_train = tf.cast(x_train,dtype=tf.float32)
y_train = tf.cast(y_train,dtype=tf.int32)
x_test = tf.cast(x_test,dtype=tf.float32)
y_test = tf.cast(y_test,dtype=tf.int32)
# 通过tf.data.Dataset.from_tensor_slices函数将特征值和目标值拼接起来,组成样本数据
train_db = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(128)
test_db = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(128)
print(train_db)
print(test_db)
# 根据公式创建权重和偏置
'''60000个样本,28 X 28 = 784个特征,所以有784个输入神经元,因为是数字识别,输出是0-9的数字图片,输出神经元是10个
,所以权重个数是[784 X 10], [60000,784] dot [784,10] ==> [60000,10]'''
weight = tf.Variable(initial_value=tf.random.truncated_normal([784,10], stddev=0.1))
print(weight)
# 10个输出神经元,所以有10个偏置
bias = tf.Variable(initial_value=(tf.zeros([10])))
print(bias)
# 批量抓取数据 x = (60000,28,28)
for epoch in range(10):# 60000图循环10次
for step, (x,y) in enumerate(train_db):
# 将数据从三维转化为二维
x = tf.reshape(x, [-1, 28 * 28])
with tf.GradientTape() as taps:
# [128,784] dot [784,10] ==> [128,10]
y_predict = tf.add(tf.matmul(x,weight), bias)
# 把预测值转化为0-9的概率
y_predict = tf.nn.softmax(y_predict)
# 因为现在真是值是一个值,所以要进行one-hot编码转化为类别
y = tf.one_hot(y, depth=10)
# 交叉信息熵:信息熵越小则说明误差越小,正确率越高
loss = tf.reduce_mean(tf.reduce_sum(- (y * tf.math.log(y_predict))))
# 构建模型,并计算梯度下降
grade = taps.gradient(loss,[weight,bias])
# 定义学习率
lr = 0.001
# w = w-lr * grade_w
weight.assign_sub(lr * grade[0])
bias.assign_sub(lr * grade[1])
if step % 100 == 0:
print(f'第第{epoch}迭代的第{step}次都loss为:{loss}')
# 上面循环完毕代表6w张图训练集已训练完成,接下来要通过测试集来查看正确率
total_correct, total_num = 0, 0
for setp, (x, y) in enumerate(test_db):
x = tf.reshape(x, [-1, 28* 28])
# 拿着训练好的权重和偏置进行验证
y_test_predict = tf.add(tf.matmul(x,weight),bias)
# 把预测值输出到0-1之间
prob = tf.nn.softmax(y_test_predict,axis=1)
# 获取概率最大值的索引位置
pred = tf.argmax(prob,axis=1)
pred = tf.cast(pred, dtype=tf.int32)
correct = tf.cast(tf.equal(pred, y), dtype=tf.int32)
correct = tf.reduce_sum(correct)
# 把正确率的数量添加到total_correct
total_correct += int(correct)
total_num += x.shape[0]
acc = total_correct / total_num
print('测试集的正确率为:', acc)
3.手写深度神经网络代码如下:
其实深度神经网络只是在简单神经网络的基础上添加了多个权重和偏置
import tensorflow as tf
import numpy as np
# 获取数据
from tensorflow.keras import datasets
(x_train, y_train),(x_test, y_test) = datasets.mnist.load_data()
# 对数据进行归一化
x_train, x_test = x_train/225.0, x_test/255.0
# 将转化数据类型
x_train = tf.cast(x_train,dtype=tf.float32)
y_train = tf.cast(y_train,dtype=tf.int32)
x_test = tf.cast(x_test,dtype=tf.float32)
y_test = tf.cast(y_test,dtype=tf.int32)
# 将特征值和目标值拼接起来,组成样本数据
train_db = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(128)
test_db = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(128)
print(train_db)
print(test_db)
# 根据公式创建权重和偏置
'''输入的特征是784个,我们希望通过输入层后的第一个隐藏层是500个神经元,500个神经元,所以有500个偏置'''
# 第一层隐藏层 [128,784] dot [784,500] ==> [128,500]
w1 = tf.Variable(initial_value=tf.random.truncated_normal([784,500], stddev=0.1))
b1 = tf.Variable(initial_value=(tf.zeros([500])))
# 第二个隐藏层 [128,500] dot [500,200] ==> [128,200]
w2 = tf.Variable(initial_value=tf.random.truncated_normal([500,200], stddev=0.1))
b2 = tf.Variable(initial_value=(tf.zeros([200])))
# 输出层 [128,200] dot [200,10] ==> [128,10]
w3 = tf.Variable(initial_value=tf.random.truncated_normal([200,10], stddev=0.1))
b3 = tf.Variable(initial_value=(tf.zeros([10])))
# 批量抓取数据 x = (60000,28,28)
for epoch in range(10):# 60000图循环10次
for step, (x,y) in enumerate(train_db):
# 将数据从三维转化为二维
x = tf.reshape(x, [-1, 28 * 28])
with tf.GradientTape() as taps:
# [128,784] dot [784,500] ==> [128,500], 输入层通过w1和b1时,输出到第一隐藏层(500个神经元)的结果r1
r1 = tf.add(tf.matmul(x,w1), b1)
# 添加激活函数
r1 = tf.nn.relu(r1)
# 第二隐藏层 [128,500] dot [500,200] ==> [128,200]
r2 = tf.nn.relu(r1 @ w2 + b2)
# 输出层 [128,200] dot [200,10] ==> [128,10]
y_predict = r2 @ w3 + b3
# 把预测值转化为0-9的概率
y_predict = tf.nn.softmax(y_predict)
# 因为现在真是值是一个值,所以要进行one-hot编码转化为类别
y = tf.one_hot(y, depth=10)
# 交叉信息熵:信息熵越小则说明误差越小,正确率越高
loss = tf.reduce_mean(tf.reduce_sum(- (y * tf.math.log(y_predict))))
# 构建模型,并计算梯度下降
grade = taps.gradient(loss,[w1, b1, w2, b2, w3, b3])
# 定义学习率
lr = 0.001
# w = w-lr * grade_w
w1.assign_sub(lr * grade[0])
b1.assign_sub(lr * grade[1])
w2.assign_sub(lr * grade[2])
b2.assign_sub(lr * grade[3])
w3.assign_sub(lr * grade[4])
b3.assign_sub(lr * grade[5])
if step % 100 == 0:
print(f'第第{epoch}迭代的第{step}次都loss为:{loss}')
# 上面循环完毕代表6w张图训练集已训练完成,接下来要通过测试集来查看正确率
total_correct, total_num = 0, 0
for setp, (x, y) in enumerate(test_db):
x = tf.reshape(x, [-1, 28* 28])
# 拿着训练好的权重和偏置进行验证
r1 = tf.nn.relu(tf.add(tf.matmul(x,w1), b1))
r2 = tf.nn.relu(r1 @ w2 + b2)
y_test_predict = r2 @ w3 + b3
# 把预测值输出到0-1之间
prob = tf.nn.softmax(y_test_predict,axis=1)
# 获取概率最大值的索引位置
pred = tf.argmax(prob,axis=1)
pred = tf.cast(pred, dtype=tf.int32)
correct = tf.cast(tf.equal(pred, y), dtype=tf.int32)
correct = tf.reduce_sum(correct)
# 把正确率的数量添加到total_correct
total_correct += int(correct)
total_num += x.shape[0]
acc = total_correct / total_num
print('测试集的正确率为:', acc)