龟龟是最可爱的小猫咪
Gluon训练一个网络的结构概览
从下面一段代码中可以看到gluon定义及训练一个网络的基本过程
from mxnet import nd
from mxnet.gluon import nn
from mxnet import gluon
from mxnet import autograd
class Net(nn.Block):
def __init__(self, **kwargs):
super(Net, self).__init__(**kwargs)
self.dense0 = nn.Dense(4, use_bias=False)
self.dense1 = nn.Dense(2, use_bias=False)
def forward(self, x):
return self.dense1((self.dense0(x)))
def train():
net = Net()
net.initialize()
w = net.dense0.weight
print ('weight shape after initialize', w.shape)
# 'weight params', w.data())
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 1})
data = nd.ones(shape=(1, 1, 28, 28))
label = nd.ones(shape=(1, 2))
loss = gluon.loss.L2Loss()
with autograd.record():
res = net(data)
w = net.dense0.weight
print ('net[0] name', net.dense0.name, 'weight shape', w.shape, '\nparams', w.data(), 'grad', w.grad())
L = loss(res, label)
L.backward()
trainer.step(batch_size=1)
print ('net[0] name', net.dense0.name, 'weight shape', w.shape, '\nparams', w.data(), 'grad', w.grad())
if __name__ == '__main__':
train()
打印结果如下:
weight shape after initialize (4, 0)
net[0] name dense0 weight shape (4, 784)
params
[[ 0.0068339 0.01299825 0.0301265 ... -0.02863884 -0.065321
-0.05844658]
[ 0.06397291 0.03163359 -0.0507907 ... 0.03343763 -0.03537887
-0.03153095]
[ 0.02405292 -0.00215812 0.00864208 ... -0.03622346 -0.00675231
0.01282176]
[ 0.01297587 0.02718969 -0.02421831 ... -0.05000407 0.02510548
0.00611195]]
<NDArray 4x784 @cpu(0)> grad
[[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]]
<NDArray 4x784 @cpu(0)>
net[0] name dense0 weight shape (4, 784)
params
[[-0.02588698 -0.01972263 -0.00259437 ... -0.06135972 -0.09804188
-0.09116746]
[ 0.03434887 0.00200954 -0.08041474 ... 0.00381359 -0.06500292
-0.06115499]
[ 0.03586442 0.00965339 0.02045358 ... -0.02441196 0.00505919
0.02463327]
[-0.03283395 -0.01862013 -0.07002813 ... -0.09581389 -0.02070434
-0.03969787]]
<NDArray 4x784 @cpu(0)> grad
[[ 0.03272087 0.03272087 0.03272087 ... 0.03272087 0.03272087
0.03272087]
[ 0.02962404 0.02962404 0.02962404 ... 0.02962404 0.02962404
0.02962404]
[-0.01181151 -0.01181151 -0.01181151 ... -0.01181151 -0.01181151
-0.01181151]
[ 0.04580982 0.04580982 0.04580982 ... 0.04580982 0.04580982
0.04580982]]
<NDArray 4x784 @cpu(0)>
Process finished with exit code 0
主要步骤可以写成:
- 定义一个神经网络
- 网络初始化
- 训练:
- 定义一个Trainer
- 传入数据及标签,数据大小batch_shape
- 前向传播计算loss
- 反向传播得到梯度
- 更新权重等参数