get_internals()
Gets a new grouped symbol sgroup. The output of sgroup is a list of outputs of all of the internal nodes.
>>> a = mx.sym.var('a')
>>> b = mx.sym.var('b')
>>> c = a + b
>>> d = c.get_internals()
>>> d
>>> d.list_outputs()
['a', 'b', '_plus4_output']
def L2_penalty(w, b):
print(w,b)
# return mx.sym.sum(mx.sym.square(mx.sym.Variable(w))) + mx.sym.sum(mx.sym.square(mx.sym.Variable(b)))
return mx.sym.sum(mx.sym.square(w)) + mx.sym.sum(mx.sym.square(b))
def get_symbol():
...
fc3 = mx.sym.FullyConnected(
name='fc3', data=dropout2, num_hidden=num_classes)
if dtype == 'float16':
fc3 = mx.sym.Cast(data=fc3, dtype=np.float32)
output = mx.sym.softmax(data=fc3, axis=1, name='softmax_layer')
print(output.get_internals()['conv1_weight'])
mate_cnn_fc = [L2_penalty(output.get_internals()[output.list_arguments()[i]], output.get_internals()[output.list_arguments()[i+1]]) for i in range(1,len(output.list_arguments()),2)]
mates_sum = mx.sym.add_n(*mate_cnn_fc)
loss = mx.sym.mean(emd_l2(output, label, num_classes)) + WeightDecay * mates_sum
emd2_loss = mx.sym.MakeLoss(loss, name='loss')
pred_loss = mx.sym.Group(
[mx.sym.BlockGrad(output, name='pred'), emd2_loss])
# softmax = mx.sym.SoftmaxOutput(data=fc3, name='softmax')
# return softmax
return pred_loss
#网络定义
def getMTL(sym, layer_name):
all_layers = sym.get_internals()
flat = all_layers[layer_name + '_output']
pred_gender = mx.symbol.FullyConnected(data=flat, num_hidden=2, name='pred_gender')
# pred_gender = mx.symbol.FullyConnected(data=flat, num_hidden=1, name='pred_gender')
pred_age = mx.symbol.FullyConnected(data=flat, num_hidden=1, name='pred_age')
pred_mask = mx.symbol.FullyConnected(data=flat, num_hidden=2, name='pred_mask')
pred_glass = mx.symbol.FullyConnected(data=flat, num_hidden=2, name='pred_glass')
pred_sunglass = mx.symbol.FullyConnected(data=flat, num_hidden=2, name='pred_sunglass')
pred_hat = mx.symbol.FullyConnected(data=flat, num_hidden=2, name='pred_hat')
labels = mx.symbol.Variable('attr_label')
label_gender = mx.symbol.slice_axis(data=labels, axis=1, begin=0, end=1,name='slice01')
label_gender = mx.symbol.Flatten(data=label_gender)
label_gender_reshape = mx.symbol.Reshape(data=label_gender, shape=(-1,))
loss_gender = mx.symbol.SoftmaxOutput(data=pred_gender, label=label_gender_reshape ,grad_scale=1, use_ignore=True, ignore_label=-1,name='gender_out')
# loss_gender = mx.symbol.LogisticRegressionOutput(data=pred_gender, label=label_gender_reshape) * (label_gender != -1)
label_age = mx.symbol.slice_axis(data=labels, axis=1, begin=1, end=2,name='slice12')
label_age = mx.symbol.Flatten(data=label_age)
# label_age=label_age/50.0
# pred_age = pred_age/50.0
label_age_reshape = mx.symbol.Reshape(data=label_age, shape=(-1,))
# loss_age = mx.symbol.LogisticRegressionOutput(data=pred_age, label=label_age_reshape) * (label_age != -1)
loss_age = mx.symbol.Custom(data=pred_age, label=label_age, op_type='l2_regression')* (label_age != -1)
label_mask = mx.symbol.slice_axis(data=labels, axis=1, begin=2, end=3,name='slice23')
label_mask = mx.symbol.Flatten(data=label_mask)
label_mask_reshape = mx.symbol.Reshape(data=label_mask, shape=(-1,))
loss_mask = mx.symbol.SoftmaxOutput(data=pred_mask, label=label_mask_reshape, grad_scale=1, use_ignore=True, ignore_label=-1 ,name='mask_out')
label_glass = mx.symbol.slice_axis(data=labels, axis=1, begin=3, end=4,name='slice34')
label_glass = mx.symbol.Flatten(data=label_glass)
label_glass_reshape = mx.symbol.Reshape(data=label_glass, shape=(-1,))
loss_glass = mx.symbol.SoftmaxOutput(data=pred_glass, label=label_glass_reshape, grad_scale=1, use_ignore=True, ignore_label=-1 ,name='glass_out')
label_sunglass = mx.symbol.slice_axis(data=labels, axis=1, begin=4, end=5,name='slice45')
label_sunglass = mx.symbol.Flatten(data=label_sunglass)
label_sunglass_reshape = mx.symbol.Reshape(data=label_sunglass, shape=(-1,))
loss_sunglass = mx.symbol.SoftmaxOutput(data=pred_sunglass, label=label_sunglass_reshape, grad_scale=1, use_ignore=True, ignore_label=-1,name='sunglass_out')
label_hat = mx.symbol.slice_axis(data=labels, axis=1, begin=5, end=6,name='slice56')
label_hat = mx.symbol.Flatten(data=label_hat)
label_hat_reshape = mx.symbol.Reshape(data=label_hat, shape=(-1,))
loss_hat = mx.symbol.SoftmaxOutput(data=pred_hat, label=label_hat_reshape, grad_scale=1, use_ignore=True, ignore_label=-1,name='hat_out')
return mx.symbol.Group([loss_gender, loss_age,loss_mask,loss_glass,loss_sunglass, loss_hat])
list_arguments()
Lists all the arguments in the symbol.
Example
>>> a = mx.sym.var('a')
>>> b = mx.sym.var('b')
>>> c = a + b
>>> c.list_arguments
['a', 'b']
Returns: args – List containing the names of all the arguments required to compute the symbol.
>>> data = mx.sym.Variable('data')
>>> prev = mx.sym.Variable('prev')
>>> fc1 = mx.sym.FullyConnected(data=data, name='fc1', num_hidden=128)
>>> fc2 = mx.sym.FullyConnected(data=prev, name='fc2', num_hidden=128)
>>> out = mx.sym.Activation(data=mx.sym.elemwise_add(fc1, fc2), act_type='relu')
>>> out.list_arguments()
['data', 'fc1_weight', 'fc1_bias', 'prev', 'fc2_weight', 'fc2_bias']
>>> out.infer_shape(data=(10,64))
(None, None, None)
>>> out.infer_shape_partial(data=(10,64))
([(10L, 64L), (128L, 64L), (128L,), (), (), ()], [(10L, 128L)], [])
>>> # infers shape if you give information about fc2
>>> out.infer_shape(data=(10,64), prev=(10,128))
([(10L, 64L), (128L, 64L), (128L,), (10L, 128L), (128L, 128L), (128L,)], [(10L, 128L)], [])
- parameters (for modules with parameters)
- `get_params()`: return a tuple `(arg_params, aux_params)`. Each of those
is a dictionary of name to ``NDArray`` mapping. Those `NDArray` always lives on
CPU. The actual parameters used for computing might live on other devices (GPUs),
this function will retrieve (a copy of) the latest parameters. Therefore, modifying
- ``set_params(arg_params, aux_params)``: assign parameters to the devices
doing the computation.
def get_params(self):
"""Gets parameters, those are potentially copies of the the actual parameters used
to do computation on the device.
Returns
-------
``(arg_params, aux_params)``
A pair of dictionaries each mapping parameter names to NDArray values.
Examples
--------
>>> # An example of getting module parameters.
>>> print mod.get_params()
({'fc2_weight': , 'fc1_weight': ,
'fc3_bias': , 'fc3_weight': ,
'fc2_bias': , 'fc1_bias': }, {})
"""
raise NotImplementedError()
def update(self):
"""Updates parameters according to the installed optimizer and the gradients computed
in the previous forward-backward batch.
Examples
--------
>>> # An example of updating module parameters.
>>> mod.init_optimizer(kvstore='local', optimizer='sgd',
... optimizer_params=(('learning_rate', 0.01), ))
>>> mod.backward()
>>> mod.update()
>>> print mod.get_params()[0]['fc3_weight'].asnumpy()
[[ 5.86930104e-03 5.28078526e-03 -8.88729654e-03 -1.08308345e-03
6.13054074e-03 4.27560415e-03 1.53817423e-03 4.62131854e-03
4.69872449e-03 -2.42400169e-03 9.94111411e-04 1.12386420e-03
...]]
"""
raise NotImplementedError()
def save_params(self, fname):
"""Saves model parameters to file.
Parameters
----------
fname : str
Path to output param file.
Examples
--------
>>> # An example of saving module parameters.
>>> mod.save_params('myfile')
"""
arg_params, aux_params = self.get_params()
save_dict = {('arg:%s' % k) : v.as_in_context(cpu()) for k, v in arg_params.items()}
save_dict.update({('aux:%s' % k) : v.as_in_context(cpu()) for k, v in aux_params.items()})
ndarray.save(fname, save_dict)
def load_params(self, fname):
"""Loads model parameters from file.
Parameters
----------
fname : str
Path to input param file.
Examples
--------
>>> # An example of loading module parameters.
>>> mod.load_params('myfile')
"""
save_dict = ndarray.load(fname)
arg_params = {}
aux_params = {}
for k, value in save_dict.items():
arg_type, name = k.split(':', 1)
if arg_type == 'arg':
arg_params[name] = value
elif arg_type == 'aux':
aux_params[name] = value
else:
raise ValueError("Invalid param file " + fname)
self.set_params(arg_params, aux_params)
def forward(self, data_batch, is_train=None):
"""Forward computation. It supports data batches with different shapes, such as
different batch sizes or different image sizes.
If reshaping of data batch relates to modification of symbol or module, such as
changing image layout ordering or switching from training to predicting, module
rebinding is required.
Parameters
----------
data_batch : DataBatch
Could be anything with similar API implemented.
is_train : bool
Default is ``None``, which means `is_train` takes the value of ``self.for_training``.
Examples
--------
>>> import mxnet as mx
>>> from collections import namedtuple
>>> Batch = namedtuple('Batch', ['data'])
>>> data = mx.sym.Variable('data')
>>> out = data * 2
>>> mod = mx.mod.Module(symbol=out, label_names=None)
>>> mod.bind(data_shapes=[('data', (1, 10))])
>>> mod.init_params()
>>> data1 = [mx.nd.ones((1, 10))]
>>> mod.forward(Batch(data1))
>>> print mod.get_outputs()[0].asnumpy()
[[ 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]
>>> # Forward with data batch of different shape
>>> data2 = [mx.nd.ones((3, 5))]
>>> mod.forward(Batch(data2))
>>> print mod.get_outputs()[0].asnumpy()
[[ 2. 2. 2. 2. 2.]
[ 2. 2. 2. 2. 2.]
[ 2. 2. 2. 2. 2.]]
"""
raise NotImplementedError()
def backward(self, out_grads=None):
"""Backward computation.
Parameters
----------
out_grads : NDArray or list of NDArray, optional
Gradient on the outputs to be propagated back.
This parameter is only needed when bind is called
on outputs that are not a loss function.
Examples
--------
>>> # An example of backward computation.
>>> mod.backward()
>>> print mod.get_input_grads()[0].asnumpy()
[[[ 1.10182791e-05 5.12257748e-06 4.01927764e-06 8.32566820e-06
-1.59775993e-06 7.24269375e-06 7.28067835e-06 -1.65902311e-05
5.46342608e-06 8.44196393e-07]
...]]
"""
raise NotImplementedError()
def get_outputs(self, merge_multi_context=True):
"""Gets outputs of the previous forward computation.
If `merge_multi_context` is ``True``, it is like ``[out1, out2]``. Otherwise,
it returns out put of form ``[[out1_dev1, out1_dev2], [out2_dev1, out2_dev2]]``.
All the output elements have type `NDArray`. When `merge_multi_context` is ``False``,
those `NDArray` instances might live on different devices.
Parameters
----------
merge_multi_context : bool
Defaults to ``True``. In the case when data-parallelism is used, the outputs
will be collected from multiple devices. A ``True`` value indicates that we
should merge the collected results so that they look like from a single
executor.
Returns
-------
list of `NDArray` or list of list of `NDArray`.
Output
Examples
--------
>>> # An example of getting forward output.
>>> print mod.get_outputs()[0].asnumpy()
[[ 0.09999977 0.10000153 0.10000716 0.10000195 0.09999853 0.09999743
0.10000272 0.10000113 0.09999088 0.09999888]]
"""
raise NotImplementedError()
def get_input_grads(self, merge_multi_context=True):
"""Gets the gradients to the inputs, computed in the previous backward computation.
If `merge_multi_context` is ``True``, it is like ``[grad1, grad2]``. Otherwise, it
is like ``[[grad1_dev1, grad1_dev2], [grad2_dev1, grad2_dev2]]``. All the output
elements have type `NDArray`. When `merge_multi_context` is ``False``, those `NDArray`
instances might live on different devices.
Parameters
----------
merge_multi_context : bool
Defaults to ``True``. In the case when data-parallelism is used, the gradients
will be collected from multiple devices. A ``True`` value indicates that we
should merge the collected results so that they look like from a single
executor.
Returns
-------
list of NDArray or list of list of NDArray
Input gradients.
Examples
--------
>>> # An example of getting input gradients.
>>> print mod.get_input_grads()[0].asnumpy()
[[[ 1.10182791e-05 5.12257748e-06 4.01927764e-06 8.32566820e-06
-1.59775993e-06 7.24269375e-06 7.28067835e-06 -1.65902311e-05
5.46342608e-06 8.44196393e-07]
...]]
"""
raise NotImplementedError()
class mxnet.metric.CompositeEvalMetric(metrics=None, name='composite', output_names=None, label_names=None)[source]
Manages multiple evaluation metrics.
Parameters:
metrics (list of EvalMetric) – List of child metrics.
name (str) – Name of this metric instance for display.
output_names (list of str, or None) – Name of predictions that should be used when updating with update_dict. By default include all predictions.
label_names (list of str, or None) – Name of labels that should be used when updating with update_dict. By default include all labels.
Examples
>>> predicts = [mx.nd.array([[0.3, 0.7], [0, 1.], [0.4, 0.6]])]
>>> labels = [mx.nd.array([0, 1, 1])]
>>> eval_metrics_1 = mx.metric.Accuracy()
>>> eval_metrics_2 = mx.metric.F1()
>>> eval_metrics = mx.metric.CompositeEvalMetric()
>>> for child_metric in [eval_metrics_1, eval_metrics_2]:
>>> eval_metrics.add(child_metric)
>>> eval_metrics.update(labels = labels, preds = predicts)
>>> print eval_metrics.get()
(['accuracy', 'f1'], [0.6666666666666666, 0.8])
查看mxnet中的symbol图的所有变量,以及他们的shape
>>> import mxnet as mx
>>>
>>> a = mx.sym.Variable('data')
>>> b = mx.sym.FullyConnected(data=a,name='fc1',num_hidden=100)
>>> data_shape = {'data':(256,64)}
>>> arg_shape,_,_ = b.infer_shape(**data_shape)
>>> b.list_arguments() # 列出symbol中的所有参数,这里是输入以及全连接层的权值和偏置
['data', 'fc1_weight', 'fc1_bias']
>>> arg_shape # 这里展示上面三个参数的size
[(256L, 64L), (100L, 64L), (100L,)]
从0实现正则化例子
代码:
#!/usr/bin/env python
# -*- coding:utf-8 -*-
#沐神教程实战之正则化学习
#本例子使用人工生成数据集
from mxnet import ndarray as nd
from mxnet import autograd
from mxnet import gluon
num_train = 20
num_test = 100
num_inputs = 200
# 生成数据集
# 定义模型真实的参数
true_w = nd.ones((num_inputs, 1)) * 0.01
true_b = 0.05
# ⽣成训练和测试数据集
X = nd.random.normal(shape=(num_train + num_test, num_inputs))
Y = nd.dot(X, true_w)
Y += .01 * nd.random.normal(shape=Y.shape)
X_train, X_test = X[:num_train], X[num_train:]
Y_train, Y_test = Y[:num_train], Y[num_train:]
# 定义⼀个函数它每次返回batch_size 个随机的样本和对应的⽬标
import random
batch_size = 1
def data_iter(num_examples):
idx = list(range(num_examples))
random.shuffle(idx)
for i in range(0, num_examples, batch_size):
j = nd.array(idx[i:min(i + batch_size, num_examples)])
yield X.take(j), Y.take(j)
# 初始化模型参数
def get_params():
w = nd.random.normal(shape=(num_inputs, 1)) * 0.1
b = nd.zeros((1,))
for param in (w, b):
param.attach_grad()
return (w, b)
# L2 范数正则化
def L2_penalty(w, b):
return (w ** 2).sum() + b ** 2
# 定义训练和测试
import matplotlib as mpl
mpl.rcParams['figure.dpi']= 120
import matplotlib.pyplot as plt
def net(X, lambd, w, b):
return nd.dot(X, w) + b
def square_loss(yhat, y):
return (yhat - y.reshape(yhat.shape)) ** 2
def SGD(params, lr):
for param in params:
param[:] = param - lr * param.grad
def test(params, X, y):
return square_loss(net(X, 0, *params), y).mean().asscalar()
def train(lambd):
epochs = 10
learning_rate = 0.002
params = get_params()
train_loss = []
test_loss = []
for e in range(epochs):
for data, label in data_iter(num_train):
with autograd.record():
output = net(data, lambd, *params)
# 加入L2正则,惩罚过于复杂的模型
loss = square_loss(output, label) + lambd * L2_penalty(*params)
loss.backward()
SGD(params, learning_rate)
train_loss.append(test(params, X_train, Y_train))
test_loss.append(test(params, X_test, Y_test))
plt.plot(train_loss)
plt.plot(test_loss)
plt.legend(['train', 'test'])
plt.show()
return 'learned w[:10]:', params[0][:10], 'learend b:', params[1]
# 测试
# 先不用正则化,lamda=0
train(0)
# 使用lamda=2的正则化
# train(2.5)
使用Gloun
代码:
#!/usr/bin/env python
# -*- coding:utf-8 -*-
#沐神教程实战之正则化学习
#本例子使用人工生成数据集
#使用gluon
from mxnet import ndarray as nd
from mxnet import autograd
from mxnet import gluon
num_train = 20
num_test = 100
num_inputs = 200
true_w = nd.ones((num_inputs, 1)) * 0.01
true_b = 0.05
X = nd.random.normal(shape=(num_train + num_test, num_inputs))
y = nd.dot(X, true_w)
y += .01 * nd.random.normal(shape=y.shape)
X_train, X_test = X[:num_train, :], X[num_train:, :]
y_train, y_test = y[:num_train], y[num_train:]
import matplotlib.pyplot as plt
import matplotlib as mpl
batch_size = 1
dataset_train = gluon.data.ArrayDataset(X_train, y_train)
data_iter_train = gluon.data.DataLoader(dataset_train, batch_size, shuffle=True)
square_loss = gluon.loss.L2Loss()
def test(net, X, y):
return square_loss(net(X), y).mean().asscalar()
def train(weight_decay):
learning_rate = 0.005
epochs = 10
net = gluon.nn.Sequential()
with net.name_scope():
net.add(gluon.nn.Dense(1))
net.initialize()
# 注意到这⾥ 'wd':weight decay
# 通过优化算法的wd参数实现对模型的正则化(相当于 L2 范数正则化)
trainer = gluon.Trainer(net.collect_params(), 'sgd', {
'learning_rate': learning_rate, 'wd': weight_decay})
train_loss = []
test_loss = []
for e in range(epochs):
for data, label in data_iter_train:
with autograd.record():
output = net(data)
loss = square_loss(output, label)
loss.backward()
trainer.step(batch_size)
train_loss.append(test(net, X_train, y_train))
test_loss.append(test(net, X_test, y_test))
plt.plot(train_loss)
plt.plot(test_loss)
plt.legend(['train', 'test'])
plt.show()
return ('learned w[:10]:', net[0].weight.data()[:, :10],
'learned b:', net[0].bias.data())
train(6)