多层感知机
多层感知机与前面介绍的多类逻辑回归非常类似,主要的区别是我们在输入层和输出层之间插入了一个到多个隐含层。
定义一个只有一个隐含层的模型,这个隐含层输出256个节点。
import sys
sys.path.append('..')
import utils
batch_size = 256
train_data, test_data = utils.load_data_fashion_mnist(batch_size)
In [2]:
from mxnet import ndarray as nd
num_inputs = 28*28
num_outputs = 10
num_hidden = 256
weight_scale = .01
W1 = nd.random_normal(shape=(num_inputs, num_hidden), scale=weight_scale)
b1 = nd.zeros(num_hidden)
W2 = nd.random_normal(shape=(num_hidden, num_outputs), scale=weight_scale)
b2 = nd.zeros(num_outputs)
params = [W1, b1, W2, b2]
for param in params:
param.attach_grad()
In [3]:
def relu(X):
return nd.maximum(X, 0)
def net(X):
X = X.reshape((-1, num_inputs))
h1 = relu(nd.dot(X, W1) + b1)
output = nd.dot(h1, W2) + b2
return output
from mxnet import gluon
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
In [4]:
from mxnet import autograd as autograd
learning_rate = .5
for epoch in range(5):
train_loss = 0.
train_acc = 0.
for data, label in train_data:
with autograd.record():
output = net(data)
loss = softmax_cross_entropy(output, label)
loss.backward()
utils.SGD(params, learning_rate/batch_size)
train_loss += nd.mean(loss).asscalar()
train_acc += utils.accuracy(output, label)
test_acc = utils.evaluate_accuracy(test_data, net)
print("Epoch %d. Loss: %f, Train acc %f, Test acc %f" % (
epoch, train_loss/len(train_data),
train_acc/len(train_data), test_acc))
In [5]:
#使用gluon
from mxnet import gluon
net = gluon.nn.Sequential()
with net.name_scope():
net.add(gluon.nn.Dense(256, activation="relu"))
net.add(gluon.nn.Dense(10))
net.initialize()
In [6]:
import sys
sys.path.append('..')
from mxnet import ndarray as nd
from mxnet import autograd
import utils
batch_size = 256
train_data, test_data = utils.load_data_fashion_mnist(batch_size)
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.5})
for epoch in range(5):
train_loss = 0.
train_acc = 0.
for data, label in train_data:
with autograd.record():
output = net(data)
loss = softmax_cross_entropy(output, label)
loss.backward()
trainer.step(batch_size)
train_loss += nd.mean(loss).asscalar()
train_acc += utils.accuracy(output, label)
test_acc = utils.evaluate_accuracy(test_data, net)
print("Epoch %d. Loss: %f, Train acc %f, Test acc %f" % (
epoch, train_loss/len(train_data), train_acc/len(train_data), test_acc))