import d2lzh as d2l
from mxnet import nd
from mxnet.gluon import loss as gloss
batch_size =256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
num_inputs, num_outputs, num_hiddens =784,10,256W1= nd.random.normal(scale=0.01, shape=(num_inputs, num_hiddens))
b1 = nd.zeros(num_hiddens)W2= nd.random.normal(scale=0.01, shape=(num_hiddens, num_outputs))
b2 = nd.zeros(num_outputs)
params =[W1, b1,W2, b2]for param in params:
param.attach_grad()
def relu(X):return nd.maximum(X,0)
def net(X):X=X.reshape((-1, num_inputs))H=relu(nd.dot(X,W1)+ b1)return nd.dot(H,W2)+ b2
loss = gloss.SoftmaxCrossEntropyLoss()
num_epochs, lr =5,0.5
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size,
params, lr)
输出结果
epoch 1, loss 0.7824, train acc 0.709, test acc 0.819
epoch 2, loss 0.4791, train acc 0.822, test acc 0.833
epoch 3, loss 0.4261, train acc 0.844, test acc 0.813
epoch 4, loss 0.4060, train acc 0.850, test acc 0.862
epoch 5, loss 0.3737, train acc 0.861, test acc 0.868