过程
1.导入库及获取数据集
这里采用Keras中常用数据集Datasets中的MNIST 手写字符数据集。Datasets中还有很多数据集可以拿来玩玩,详情链接如下
Keras中Datasets数据集
import tensorflow as tf
import matplotlib.pyplot as plt
# 数据集
mnist = tf.keras.datasets.mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
# 显示图片
def display_image(image):
plt.imshow(image.reshape(28, 28), cmap="binary")
plt.show()
display_image(train_images[0])
2. 整理数据集
独热编码简单理解为利用0和1表示一些参数,在此处指的是正确为1,错误为0。比如6的独热编码为0000001000
# 处理数据集,将数据集拉伸成二维,作为x_data
x_data = tf.cast(train_images.reshape(-1,28*28),dtype=tf.float32)
# 划分数据集,将数据集划分为训练集与验证集
valid_percent = 0.2
train_num = int(x_data.shape[0] * (1-valid_percent))
# 训练集
x_train = x_data[:train_num]
y_train = train_labels[:train_num]
# 验证集
x_valid = x_data[train_num:]
y_valid = train_labels[train_num:]
# 处理测试集,将测试集拉伸成二维,作为x_test
x_test = tf.cast(test_images.reshape(-1, 28*28), dtype="float32")
y_test = test_labels
# 将x归一化,归一化使其分布在0-1之间
x_train = x_train / 255.0
x_valid = x_valid / 255.0
x_test = x_test / 255.0
# 对标签y进行独热编码,深度为10。 独热编码是正确为1,其余为0
y_train = tf.one_hot(y_train, depth=10)
y_valid = tf.one_hot(y_valid, depth=10)
y_test = tf.one_hot(y_test, depth=10)
3.定义各种参数及方法
# 定义参数W和b
W = tf.Variable(tf.random.normal(shape=(28*28, 10)))
b = tf.Variable(tf.zeros(10))
# 定义模型,该模型包括三个参数,w,b,x。并将线性模型转为非线性
def model(w,x,b):
model_ = tf.matmul(x,w) + b
return tf.nn.softmax(model_)
# 定义损失函数,损失函数中包括预测值和真实值
def loss(w,x,b,y):
pred = model(w,x,b)
loss_ = tf.keras.losses.categorical_crossentropy(y_true=y, y_pred=pred)
return tf.reduce_mean(loss_)
# 优化loss最小,梯度带自动求导
def grad(w,x,b,y):
with tf.GradientTape() as tape:
loss_ = loss(w,x,b,y)
return tape.gradient(loss_, [w,b])
# 准确率
def accuracy(w,x,b,y):
pred = model(w,x,b)
corrent_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
return tf.reduce_mean(tf.cast(corrent_prediction, tf.float32))
4.神经网络训练
这里定义好几个list是为了方便后边可视化操作
# 开始神经网络
training_epochs = 20
learning_rate = 0.001
batch_size = 50
total_step = train_num // batch_size
optimizer = tf.keras.optimizers.Adam(learning_rate) # 优化器
train_loss_list = [] # 训练的损失
valid_loss_list = [] # 实际的损失
train_acc_list = [] # 训练的准确率
valid_acc_list = [] # 实际的准确率
# 一般就是两个for循环,外循环为总的epochs,内循环为一次epoch走的step
for epoch in range(training_epochs):
for step in range(total_step):
epoch_xs = x_train[step*batch_size:(step+1)*batch_size]
epoch_ys = y_train[step*batch_size:(step+1)*batch_size]
# 损失函数求导
grads = grad(W, epoch_xs, b, epoch_ys)
# 优化器优化
optimizer.apply_gradients(zip(grads, [W, b]))
train_loss = loss(W, x_train, b, y_train).numpy()
valid_loss = loss(W, x_valid, b, y_valid).numpy()
train_acc = accuracy(W, x_train, b, y_train).numpy()
valid_acc = accuracy(W, x_valid, b, y_valid).numpy()
train_acc_list.append(train_acc)
valid_acc_list.append(valid_acc)
train_loss_list.append(train_loss)
valid_loss_list.append(valid_loss)
#print(f"{epoch + 1}:train_loss:{train_loss}train_acc:{train_acc}valid_loss:{valid_loss}valid_acc:{valid_acc}")
print("epoch%d:train_loss:%f, train_acc:%f, valid_loss:%f, valid_acc:%f"%(epoch+1,train_loss,train_acc,valid_loss,valid_acc))
5.可视化损失和准确率
# 可视化损失
plt.plot(train_loss_list, 'r', label='train')
plt.plot(valid_loss_list, 'g', label='valid')
plt.legend(loc=1)
# 可视化准确率
plt.plot(train_acc_list, 'r', label='train')
plt.plot(valid_acc_list, 'g', label='valid')
plt.legend(loc=2)
6.预测及可视化
# 预测
acc_test = accuracy(W, x_test, b, y_test).numpy()
# 定义预测函数
def predict(w,x,b):
pred = model(w,x,b)
res = tf.argmax(pred,1).numpy()
return res
pred_test = predict(W, x_test,b)
# 定义可视化函数
def plot_image_labels_prediction(images, # 图像列表
labels, # 标签列表
preds, # 预测值列表
index=0, # 从第index个开始
num=10 # 缺省一次显示10幅
):
fig = plt.gcf() # 获取当前图表
fig.set_size_inches(10,4) # 1英寸等于2.54cm
if num > 10:
num = 10 # 最多显示10个子图
for i in range(0,num):
ax = plt.subplot(2,5,i+1)
ax.imshow(np.reshape(images[index],(28,28)),cmap='binary') # 显示第index个图像
title = 'label='+str(labels[index]) # 构建该图上要显示的title信息
if len(preds) > 0:
title += ',predict='+str(preds[index])
ax.set_title(title,fontsize=10) # 显示图上的title信息
ax.set_xticks([]) # 不显示坐标轴
ax.set_yticks([])
index = index + 1
plt.show()
plot_image_labels_prediction(test_images, test_labels, pred_test, 9000, 20)
完整代码
import tensorflow as tf
import matplotlib.pyplot as plt
# 数据集
mnist = tf.keras.datasets.mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
# 显示图片
def display_image(image):
plt.imshow(image.reshape(28, 28), cmap="binary")
plt.show()
display_image(train_images[0])
# 处理数据集,将数据集拉伸成二维,作为x_data
x_data = tf.cast(train_images.reshape(-1,28*28),dtype=tf.float32)
# 划分数据集,将数据集划分为训练集与验证集
valid_percent = 0.2
train_num = int(x_data.shape[0] * (1-valid_percent))
# 训练集
x_train = x_data[:train_num]
y_train = train_labels[:train_num]
# 验证集
x_valid = x_data[train_num:]
y_valid = train_labels[train_num:]
# 处理测试集,将测试集拉伸成二维,作为x_test
x_test = tf.cast(test_images.reshape(-1, 28*28), dtype="float32")
y_test = test_labels
# 将x归一化
x_train = x_train / 255.0
x_valid = x_valid / 255.0
x_test = x_test / 255.0
# 对标签y进行独热编码,深度为10。 独热编码是正确为1,其余为0
y_train = tf.one_hot(y_train, depth=10)
y_valid = tf.one_hot(y_valid, depth=10)
y_test = tf.one_hot(y_test, depth=10)
# 定义参数W和b
W = tf.Variable(tf.random.normal(shape=(28*28, 10)))
b = tf.Variable(tf.zeros(10))
# 定义模型,该模型包括三个参数,w,b,x。并将线性模型转为非线性
def model(w,x,b):
model_ = tf.matmul(x,w) + b
return tf.nn.softmax(model_)
# 定义损失函数,损失函数中包括预测值和真实值
def loss(w,x,b,y):
pred = model(w,x,b)
loss_ = tf.keras.losses.categorical_crossentropy(y_true=y, y_pred=pred)
return tf.reduce_mean(loss_)
# 优化loss最小,梯度带自动求导
def grad(w,x,b,y):
with tf.GradientTape() as tape:
loss_ = loss(w,x,b,y)
return tape.gradient(loss_, [w,b])
# 准确率
def accuracy(w,x,b,y):
pred = model(w,x,b)
corrent_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
return tf.reduce_mean(tf.cast(corrent_prediction, tf.float32))
# 开始神经网络
training_epochs = 20
learning_rate = 0.001
batch_size = 50
total_step = train_num // batch_size
optimizer = tf.keras.optimizers.Adam(learning_rate) # 优化器
train_loss_list = [] # 训练的损失
valid_loss_list = [] # 实际的损失
train_acc_list = [] # 训练的准确率
valid_acc_list = [] # 实际的准确率
# 一般就是两个for循环,外循环为总的epochs,内循环为一次epoch走的step
for epoch in range(training_epochs):
for step in range(total_step):
epoch_xs = x_train[step*batch_size:(step+1)*batch_size]
epoch_ys = y_train[step*batch_size:(step+1)*batch_size]
# 损失函数求导
grads = grad(W, epoch_xs, b, epoch_ys)
# 优化器优化
optimizer.apply_gradients(zip(grads, [W, b]))
train_loss = loss(W, x_train, b, y_train).numpy()
valid_loss = loss(W, x_valid, b, y_valid).numpy()
train_acc = accuracy(W, x_train, b, y_train).numpy()
valid_acc = accuracy(W, x_valid, b, y_valid).numpy()
train_acc_list.append(train_acc)
valid_acc_list.append(valid_acc)
train_loss_list.append(train_loss)
valid_loss_list.append(valid_loss)
#print(f"{epoch + 1}:train_loss:{train_loss}train_acc:{train_acc}valid_loss:{valid_loss}valid_acc:{valid_acc}")
print("epoch%d:train_loss:%f, train_acc:%f, valid_loss:%f, valid_acc:%f"%(epoch+1,train_loss,train_acc,valid_loss,valid_acc))
# 可视化损失
plt.plot(train_loss_list, 'r', label='train')
plt.plot(valid_loss_list, 'g', label='valid')
plt.legend(loc=1)
# 可视化准确率
plt.plot(train_acc_list, 'r', label='train')
plt.plot(valid_acc_list, 'g', label='valid')
plt.legend(loc=2)
# 预测
acc_test = accuracy(W, x_test, b, y_test).numpy()
# 定义预测函数
def predict(w,x,b):
pred = model(w,x,b)
res = tf.argmax(pred,1).numpy()
return res
pred_test = predict(W, x_test,b)
# 定义可视化函数
def plot_image_labels_prediction(images, # 图像列表
labels, # 标签列表
preds, # 预测值列表
index=0, # 从第index个开始
num=10 # 缺省一次显示10幅
):
fig = plt.gcf() # 获取当前图表
fig.set_size_inches(10,4) # 1英寸等于2.54cm
if num > 10:
num = 10 # 最多显示10个子图
for i in range(0,num):
ax = plt.subplot(2,5,i+1)
ax.imshow(np.reshape(images[index],(28,28)),cmap='binary') # 显示第index个图像
title = 'label='+str(labels[index]) # 构建该图上要显示的title信息
if len(preds) > 0:
title += ',predict='+str(preds[index])
ax.set_title(title,fontsize=10) # 显示图上的title信息
ax.set_xticks([]) # 不显示坐标轴
ax.set_yticks([])
index = index + 1
plt.show()
plot_image_labels_prediction(test_images, test_labels, pred_test, 9000, 20)