import tensorflow as tf
import numpy as np
from tensorflow.keras.datasets import fashion_mnist
batch_size=256(x_train, y_train),(x_test, y_test)= fashion_mnist.load_data()
x_train = tf.cast(x_train, tf.float32)/255#在进行矩阵相乘时需要float型,故强制类型转换为float型
x_test = tf.cast(x_test,tf.float32)/255#在进行矩阵相乘时需要float型,故强制类型转换为float型
train_iter = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(batch_size)
test_iter = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(batch_size)
num_inputs =784
num_outputs =10
W = tf.Variable(tf.random.normal(shape=(num_inputs, num_outputs), mean=0, stddev=0.01, dtype=tf.float32))
b = tf.Variable(tf.zeros(num_outputs, dtype=tf.float32))defsoftmax(logits, axis=-1):# softmax运算return tf.exp(logits)/tf.reduce_sum(tf.exp(logits), axis, keepdims=True)defnet(X):# 定义模型
logits = tf.matmul(tf.reshape(X, shape=(-1, W.shape[0])), W)+ b
return softmax(logits)defcross_entropy(y_hat, y):# 交叉熵损失函数
y = tf.cast(tf.reshape(y, shape=[-1,1]),dtype=tf.int32)
y = tf.one_hot(y, depth=y_hat.shape[-1])
y = tf.cast(tf.reshape(y, shape=[-1, y_hat.shape[-1]]),dtype=tf.int32)return-tf.math.log(tf.boolean_mask(y_hat, y)+1e-8)defaccuracy(y_hat, y):# 计算分类准确率return np.mean((tf.argmax(y_hat, axis=1)== y))# 描述,对于tensorflow2中,比较的双方必须类型都是int型,所以要将输出和标签都转为int型defevaluate_accuracy(data_iter, net):
acc_sum, n =0.0,0for _,(X, y)inenumerate(data_iter):
y = tf.cast(y,dtype=tf.int64)
acc_sum += np.sum(tf.cast(tf.argmax(net(X), axis=1), dtype=tf.int64)== y)
n += y.shape[0]return acc_sum / n
num_epochs, lr =5,0.1deftrain_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, params=None, lr=None, trainer=None):for epoch inrange(num_epochs):
train_l_sum, train_acc_sum, n =0.0,0.0,0for X, y in train_iter:with tf.GradientTape()as tape:
y_hat = net(X)
l = tf.reduce_sum(loss(y_hat, y))
grads = tape.gradient(l, params)if trainer isNone:# 如果没有传入优化器,则使用原先编写的小批量随机梯度下降for i, param inenumerate(params):
param.assign_sub(lr * grads[i]/ batch_size)else:# tf.keras.optimizers.SGD 直接使用是随机梯度下降 theta(t+1) = theta(t) - learning_rate * gradient# 这里使用批量梯度下降,需要对梯度除以 batch_size
trainer.apply_gradients(zip([grad / batch_size for grad in grads], params))
y = tf.cast(y, dtype=tf.float32)
train_l_sum += l.numpy()
train_acc_sum += tf.reduce_sum(tf.cast(tf.argmax(y_hat, axis=1)== tf.cast(y, dtype=tf.int64), dtype=tf.int64)).numpy()
n += y.shape[0]
test_acc = evaluate_accuracy(test_iter, net)print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f'%(epoch +1, train_l_sum / n, train_acc_sum / n, test_acc))
trainer = tf.keras.optimizers.SGD(lr)
train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs, batch_size,[W, b], lr)import matplotlib.pyplot as plt
X, y =iter(test_iter).next()defget_fashion_mnist_labels(labels):
text_labels =['t-shirt','trouser','pullover','dress','coat','sandal','shirt','sneaker','bag','ankle boot']return[text_labels[int(i)]for i in labels]defshow_fashion_mnist(images, labels):# 这⾥的_表示我们忽略(不使⽤)的变量
_, figs = plt.subplots(1,len(images), figsize=(12,12))# 这里注意subplot 和subplots 的区别for f, img, lbl inzip(figs, images, labels):
f.imshow(tf.reshape(img, shape=(28,28)).numpy())
f.set_title(lbl)
f.axes.get_xaxis().set_visible(False)
f.axes.get_yaxis().set_visible(False)
plt.show()
true_labels = get_fashion_mnist_labels(y.numpy())
pred_labels = get_fashion_mnist_labels(tf.argmax(net(X), axis=1).numpy())
titles =[true +'\n'+ pred for true, pred inzip(true_labels, pred_labels)]
show_fashion_mnist(X[0:9], titles[0:9])
输出
epoch 1, loss 0.7841, train acc 0.750, test acc 0.793
epoch 2, loss 0.5709, train acc 0.812, test acc 0.812
epoch 3, loss 0.5257, train acc 0.825, test acc 0.819
epoch 4, loss 0.5014, train acc 0.832, test acc 0.825
epoch 5, loss 0.4856, train acc 0.836, test acc 0.827