与3中的一样,只是把自己实现的函数用API接口代替了,看着简洁了
import tensorflow as tf
from d2l import tensorflow as d2l
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
# 初始化模型参数
net = tf.keras.models.Sequential()
net.add(tf.keras.layers.Flatten(input_shape=(28, 28)))
weight_initializer = tf.keras.initializers.RandomNormal(mean=0.0, stddev=0.01)
net.add(tf.keras.layers.Dense(10, kernel_initializer=weight_initializer))
# 损失函数,softmax的实现
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
# 优化算法
trainer = tf.keras.optimizers.SGD(learning_rate=.1)
# 求acc的函数和训练没有改变
def accuracy(y_hat, y):
if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:
y_hat = tf.argmax(y_hat, axis=1)
cmp = tf.cast(y_hat, y.dtype) == y
return float(tf.reduce_sum(tf.cast(cmp, y.dtype)))
def evaluate_accuracy(net, data_iter):
metric = Accumulator(2) # 正确预测数、预测总数
for X, y in data_iter:
metric.add(accuracy(net(X), y), d2l.size(y))
return metric[0] / metric[1]
class Accumulator:
def __init__(self, n):
self.data = [0.0] * n
def add(self, *args):
self.data = [a + float(b) for a, b in zip(self.data, args)]
def reset(self):
self.data = [0.0] * len(self.data)
def __getitem__(self, idx):
return self.data[idx]
# 训练
# updater是更新模型参数的常用函数
def train_epoch_ch3(net, train_iter, loss, updater):
# 训练损失总和、训练准确度总和、样本数, 累积求和
metric = Accumulator(3)
for X,y in train_iter:
# 计算梯度并更新参数
with tf.GradientTape() as tape:
y_hat = net(X)
# Keras内置的损失接受的是(标签,预测),这不同于用户在本书中的实现。
# 本书的实现接受(预测,标签),例如我们上面实现的“交叉熵”
if isinstance(loss, tf.keras.losses.Loss):
l = loss(y, y_hat)
else:
l = loss(y_hat, y)
if isinstance(updater, tf.keras.optimizers.Optimizer):
params = net.trainable_variables
grads = tape.gradient(l, params)
updater.apply_gradients(zip(grads, params))
else:
updater(X.shape[0], tape.gradient(l, updater.params))
# Keras的loss默认返回一个批量的平均损失
l_sum = l * float(tf.size(y)) if isinstance(
loss, tf.keras.losses.Loss) else tf.reduce_sum(l)
metric.add(l_sum, accuracy(y_hat, y), tf.size(y))
# 返回训练损失和训练精度
return metric[0] / metric[2], metric[1] / metric[2]
def train_ch3(net, train_iter, test_iter, loss, num_epochs, updater):
# """训练模型(定义见第3章)"""
for epoch in range(num_epochs):
train_metrics = train_epoch_ch3(net, train_iter, loss, updater)
test_acc = evaluate_accuracy(net, test_iter)
train_loss, train_acc = train_metrics
print("Epoch %s/%s:"%(epoch,num_epochs)+" train_loss: "+str(train_loss) + " train_acc: "+str(train_acc) + " test_acc: "+str(test_acc))
# 训练
num_epochs = 10
train_ch3(net, train_iter, test_iter, loss, num_epochs, trainer)
def predict_ch3(net, test_iter):
# """预测标签(定义见第3章)"""
# batch_size=256, 所以X和y的大小是256
for X, y in test_iter:
break
# 得到真实标签
trues = d2l.get_fashion_mnist_labels(y)
# 得到预测标签
preds = d2l.get_fashion_mnist_labels(tf.argmax(net(X), axis=1))
# 输出前15个的预测结果
print(trues[0:15])
print(preds[0:15])
predict_ch3(net, test_iter)
输出:之前预测错的,还是预测错了