import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
w = tf.Variable(tf.random.normal([784, 10], mean=0.0, stddev=1.0, dtype=tf.float32))
b = tf.Variable(tf.zeros(10), dtype=tf.float32)
mnist = tf.keras.datasets.mnist
(train_image, train_lables), (test_image, test_lables) = mnist.load_data()
valid_spilt = 0.2
train_num = int(len(train_image) * (1 - valid_spilt))
train_x = train_image[:train_num]
valid_x = train_image[train_num:]
test_x = test_image
train_y = train_lables[:train_num]
valid_y = train_lables[train_num:]
test_y = test_lables
train_y = tf.one_hot(train_y, depth=10)
valid_y = tf.one_hot(valid_y, depth=10)
test_y = tf.one_hot(test_y, depth=10)
train_x = train_x.reshape(-1, 784)
valid_x = valid_x.reshape(-1, 784)
test_x = test_x.reshape(-1, 784)
train_x = tf.cast(train_x / 255.0, tf.float32)
valid_x = tf.cast(valid_x / 255.0, tf.float32)
test_x = tf.cast(test_x / 255.0, tf.float32)
def model(x, w, b):
tmp = tf.matmul(x, w) + b
return tf.nn.softmax(tmp)
def loss(x, y, w, b):
pred = model(x, w, b)
tmp = tf.keras.losses.categorical_crossentropy(y_true=y, y_pred=pred)
return tf.reduce_mean(tmp)
def grad(x, y, w, b):
with tf.GradientTape() as tape:
loss_ = loss(x, y, w, b)
return tape.gradient(loss_, [w, b])
def accurancy(x, y, w, b):
pred = model(x, w, b)
tmp = tf.equal(tf.argmax(y, 1), tf.argmax(pred, 1)) # pred返回的是概率(0~9都有各自概率的分布)最大的下标
tmp = tf.cast(tmp, tf.float32)
return tf.reduce_mean(tmp)
train_epochs = 20
learning_rate = 0.001
batch_size = 50
total_size = int(train_num / batch_size)
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
loss_train_list = []
loss_valid_list = []
acc_train_list = []
acc_valid_list = []
for epoch in range(train_epochs):
for step in range(total_size):
xs = train_x[batch_size * step:batch_size * (step + 1)]
ys = train_y[batch_size * step:batch_size * (step + 1)]
grads = grad(xs, ys, w, b)
optimizer.apply_gradients(zip(grads, [w, b]))
loss_train = loss(train_x, train_y, w, b)
loss_valid = loss(valid_x, valid_y, w, b)
acc_train = accurancy(train_x, train_y, w, b)
acc_valid = accurancy(valid_x, valid_y, w, b)
loss_train_list.append(loss_train)
loss_valid_list.append(loss_valid)
acc_train_list.append(acc_train)
acc_valid_list.append(acc_valid)
print("epoch = %3d,train_loss=%4f,train_acc=%4f,valid_loss=%4f,valid_acc%4f" % (
epoch + 1, loss_train, acc_train, loss_valid, acc_valid))
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.plot(loss_train_list, "blue", label="Train loss")
plt.plot(loss_valid_list, "red", label="Valid loss")
plt.legend(loc=1)
# plt.show()
def predict(x, w, b):
pred = model(x, w, b)
result = tf.argmax(pred, 1).numpy()
return result
pred_test = predict(test_x, w, b)
# 定义可视化函数
def plot_images(images, labels, preds, index=0, num=10): # 定义之后一次最多可以显示10张图片
fig = plt.gcf()
fig.set_size_inches(10, 4) # 设置幕布的长和宽
if num > 10:
num = 10
for i in range(0, num):
ax = plt.subplot(2, 5, i + 1) # 起到了规划图形之间的分布 同时也有i的循环来指定输出哪一幅图像
# ax.imshow(np.reshape(images[index], (28, 28)), cmap='binary')
tmp = images[index]
tmp = tmp.reshape(28, 28)
ax.imshow(tmp, cmap='binary')
title = "label=" + str(labels[index])
if len(preds) > 0: # 因为有时只是想输出图像 可能会在没有预测值之前
title += ",predict=" + str(preds[index])
ax.set_title(title, fontsize=10) # fontsize是字体大小
ax.set_xticks([])
ax.set_yticks([])
index += 1
plt.show()
plot_images(test_image, test_lables, pred_test, 1000, 10)
好久没有更新CSDN了 emmmmm晚安吧
更新 添加了单层神经网络后准确率达到了98%
一个地方需要注意 凡是求梯度或者是用梯度对W和B优化的地方
因为W=[w1,w2] B=[b1.b2]
所有由原先的[w,b](或者[W,B])改为 W+B
好 接下来贴上代码
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
sjy_num = 64
w1 = tf.Variable(tf.random.normal([784, sjy_num], mean=0.0, stddev=1.0, dtype=tf.float32))
b1 = tf.Variable(tf.zeros([sjy_num]), dtype=tf.float32)
w2 = tf.Variable(tf.random.normal([sjy_num, 10], mean=0.0, stddev=1.0, dtype=tf.float32))
b2 = tf.Variable(tf.zeros([10]), dtype=tf.float32)
W = [w1, w2]
B = [b1, b2]
mnist = tf.keras.datasets.mnist
(train_image, train_lables), (test_image, test_lables) = mnist.load_data()
valid_spilt = 0.2
train_num = int(len(train_image) * (1 - valid_spilt))
train_x = train_image[:train_num]
valid_x = train_image[train_num:]
test_x = test_image
train_y = train_lables[:train_num]
valid_y = train_lables[train_num:]
test_y = test_lables
train_y = tf.one_hot(train_y, depth=10)
valid_y = tf.one_hot(valid_y, depth=10)
test_y = tf.one_hot(test_y, depth=10)
train_x = train_x.reshape(-1, 784)
valid_x = valid_x.reshape(-1, 784)
test_x = test_x.reshape(-1, 784)
train_x = tf.cast(train_x / 255.0, tf.float32)
valid_x = tf.cast(valid_x / 255.0, tf.float32)
test_x = tf.cast(test_x / 255.0, tf.float32)
def model(x, w, b):
# tmp = tf.matmul(x, w[0]) + b[0]
# tmp = tf.nn.relu(tmp)
# tmp = tf.matmul(tmp, w[1]) + b[1]
# return tf.nn.softmax(tmp)
x = tf.matmul(x, W[0]) + B[0]
x = tf.nn.relu(x)
x = tf.matmul(x, W[1]) + B[1]
return tf.nn.softmax(x)
def loss(x, y, w, b):
pred = model(x, w, b)
tmp = tf.keras.losses.categorical_crossentropy(y_true=y, y_pred=pred)
return tf.reduce_mean(tmp)
def grad(x, y, w, b):
with tf.GradientTape() as tape:
loss_ = loss(x, y, w, b)
# return tape.gradient(loss_, [w, b])
return tape.gradient(loss_, w + b)
def accurancy(x, y, w, b):
pred = model(x, w, b)
tmp = tf.equal(tf.argmax(y, 1), tf.argmax(pred, 1)) # pred返回的是概率(0~9都有各自概率的分布)最大的下标
tmp = tf.cast(tmp, tf.float32)
return tf.reduce_mean(tmp)
train_epochs = 1
learning_rate = 0.01
batch_size = 50
total_size = int(train_num / batch_size)
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
loss_train_list = []
loss_valid_list = []
acc_train_list = []
acc_valid_list = []
for epoch in range(train_epochs):
for step in range(total_size):
xs = train_x[batch_size * step:batch_size * (step + 1)]
ys = train_y[batch_size * step:batch_size * (step + 1)]
grads = grad(xs, ys, W, B)
# optimizer.apply_gradients(zip(grads, [W, B]))
optimizer.apply_gradients(zip(grads, W + B))
loss_train = loss(train_x, train_y, W, B)
loss_valid = loss(valid_x, valid_y, W, B)
acc_train = accurancy(train_x, train_y, W, B)
acc_valid = accurancy(valid_x, valid_y, W, B)
loss_train_list.append(loss_train)
loss_valid_list.append(loss_valid)
acc_train_list.append(acc_train)
acc_valid_list.append(acc_valid)
print("epoch = %3d,train_loss=%4f,train_acc=%4f,valid_loss=%4f,valid_acc%4f" % (
epoch + 1, loss_train, acc_train, loss_valid, acc_valid))
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.plot(loss_train_list, "blue", label="Train loss")
plt.plot(loss_valid_list, "red", label="Valid loss")
plt.legend(loc=1)
#
# # plt.show()
# def predict(x, w, b):
# x = model(x, w, b)
# result = tf.argmax(x, 1).numpy()
# return result
#
#
# pred_test = predict(test_x, W, B)
#
#
# # 定义可视化函数
#
# def plot_images(images, labels, preds, index=0, num=10): # 定义之后一次最多可以显示10张图片
# fig = plt.gcf()
# fig.set_size_inches(10, 4) # 设置幕布的长和宽
# if num > 10:
# num = 10
#
# for i in range(0, num):
# ax = plt.subplot(2, 5, i + 1) # 起到了规划图形之间的分布 同时也有i的循环来指定输出哪一幅图像
# # ax.imshow(np.reshape(images[index], (28, 28)), cmap='binary')
# tmp = images[index]
# tmp = tmp.reshape(28, 28)
# ax.imshow(tmp, cmap='binary')
# title = "label=" + str(labels[index])
# if len(preds) > 0: # 因为有时只是想输出图像 可能会在没有预测值之前
# title += ",predict=" + str(preds[index])
#
# ax.set_title(title, fontsize=10) # fontsize是字体大小
# ax.set_xticks([])
# ax.set_yticks([])
# index += 1
# plt.show()
#
#
# plot_images(test_image, test_lables, pred_test, 1000, 10)