Sybmol
https://www.cnblogs.com/ronny/p/8533071.html
1、生成占位符做输入
import mxnet as mx
a = mx.sym.Variable('a')
b = mx.sym.Variable('b')
c = a + b#大多数NDArray操作符都适用
priint(a, b, c)
#打印 <Symbol a> <Symbol b> <Symbol _plus1>
2、基本神经网络
net = mx.sym.Variable('data')#输入
net = mx.sym.FullyConnected(data=net, name='fc1', num_hidden=128)#全连接层
net = mx.sym.Activation(data=net, name='relu1', act_type="relu")#激活函数层
net = mx.sym.FullyConnected(data=net, name='fc2', num_hidden=10)#全连接层
net = mx.sym.SoftmaxOutput(data=net, name='out')#损失函数层
mx.viz.plot_network(net, shape={'data':(100,200)}).view()#网络可视化,这个东西厉害了,出图,100为样本维度,后面的是特征值维度
print(net.list_arguments())
#打印:['data', 'fc1_weight', 'fc1_bias', 'fc2_weight', 'fc2_bias', 'out_label']
#网络层任何没有指定输入的将自动生成一个变量,如上方的"fc1_weight"、"fc1_bias"等
3、一旦开始构建更大的网络,就可能需要使用公共前缀命名一些符号来统筹网络的结构。你可以像下面的示例一样使用前缀名称管理
data = mx.sym.Variable("data")
net = data
n_layer = 2
for i in range(n_layer):
with mx.name.Prefix("layer%d_" % (i + 1)):
net = mx.sym.FullyConnected(data=net, name="fc", num_hidden=100)
print(net.list_arguments())
#打印:['data', 'layer1_fc_weight', 'layer1_fc_bias', 'layer2_fc_weight', 'layer2_fc_bias']
4、访问形状和类型接口
arg_name = c.list_arguments() # get the names of the inputs
out_name = c.list_outputs() # get the names of the outputs
# infers output shape given the shape of input arguments
arg_shape, out_shape, _ = c.infer_shape(a=(2,3), b=(2,3))
# infers output type given the type of input arguments
arg_type, out_type, _ = c.infer_type(a='float32', b='float32')
{'input' : dict(zip(arg_name, arg_shape)),
'output' : dict(zip(out_name, out_shape))}
{'input' : dict(zip(arg_name, arg_type)),
'output' : dict(zip(out_name, out_type))}
5、绑定数据与评估结果
1、用CPU来计算
#绑定输入数据
ex = c.bind(ctx=mx.cpu(), args={'a' : mx.nd.ones([2,3]),
'b' : mx.nd.ones([2,3])})
#前向推导结果
ex.forward()
print('number of outputs = %d\nthe first output = \n%s' % (
len(ex.outputs), ex.outputs[0].asnumpy()))
2、用GPU来计算
gpu_device=mx.gpu() # Change this to mx.cpu() in absence of GPUs.
#绑定输入数据
ex_gpu = c.bind(ctx=gpu_device, args={'a' : mx.nd.ones([3,4], gpu_device)*2,
'b' : mx.nd.ones([3,4], gpu_device)*3})
#前向推导结果
ex_gpu.forward()
ex_gpu.outputs[0].asnumpy()
3、bind和forward的结合体
ex = c.eval(ctx = mx.cpu(), a = mx.nd.ones([2,3]), b = mx.nd.ones([2,3]))
print('number of outputs = %d\nthe first output = \n%s' % (
len(ex), ex[0].asnumpy()))
4、grad_req='null',不进行梯度;grad_req='write',进行梯度反向传播
5、在调用Bind时,需要提前手动为gradient分配一个空间args_grad并且传入,同时grad_req 设置为 write
input_arguments = {}
input_arguments['A'] = mx.nd.ones((10, ), ctx=mx.cpu())
input_arguments['B'] = mx.nd.ones((10, ), ctx=mx.cpu())
# allocate space for gradients
grad_arguments = {}
grad_arguments['A'] = mx.nd.ones((10, ), ctx=mx.cpu())
grad_arguments['B'] = mx.nd.ones((10, ), ctx=mx.cpu())
executor = d.bind(ctx=mx.cpu(),
args=input_arguments, # this can be a list or a dictionary mapping names of inputs to NDArray
args_grad=grad_arguments, # this can be a list or a dictionary mapping names of inputs to NDArray
grad_req='write') # instead of null, tell the executor to write gradients. This replaces the contents of grad_arguments with the gradients computed.
executor.arg_dict['A'][:] = np.random.rand(10,)
executor.arg_dict['B'][:] = np.random.rand(10,)
executor.forward()
# in this particular example, the output symbol is not a scalar or loss symbol.
# Thus taking its gradient is not possible.
# What is commonly done instead is to feed in the gradient from a future computation.
# this is essentially how backpropagation works.
out_grad = mx.nd.ones((10,), ctx=mx.cpu())
#反向推导结果
executor.backward([out_grad]) # because the graph only has one output, only one output grad is needed.
executor.grad_arrays
# [NDarray, NDArray]
6、与bind 相对的是 simple_bind,他有一个好处:不需要手动分配计算的梯度空间大小
input_shapes = {'A': (10,), 'B': (10, )}
executor = d.simple_bind(ctx=mx.cpu(),
grad_req='write', # instead of null, tell the executor to write gradients
**input_shapes)
executor.arg_dict['A'][:] = np.random.rand(10,)
executor.arg_dict['B'][:] = np.random.rand(10,)
#反向推导结果
executor.forward()
out_grad = mx.nd.ones((10,), ctx=mx.cpu())
executor.backward([out_grad])
6、保存和载入
print(c.tojson())
c.save('symbol-c.json')
c2 = mx.sym.load('symbol-c.json')
c.tojson() == c2.tojson()
7、类型转换
MXNet默认使用32位float类型。有时我们为了更好的正确率—性能权衡,想要使用低精度的数据类型。例如,英伟达Tesla Pascal GPUs(如P100)使用16位浮点的性能提升,以及GTX Pascal GPUs(如GTX1080)使用8位整型的速度更快。
我们可以使用mx.sym.cast 操作符转换数据类型
a = mx.sym.Variable('data')
b = mx.sym.cast(data=a, dtype='float16')
arg, out, _ = b.infer_type(data='float32')
print({'input':arg, 'output':out})
c = mx.sym.cast(data=a, dtype='uint8')
arg, out, _ = c.infer_type(data='int32')
print({'input':arg, 'output':out})
8、变量共享
a = mx.sym.Variable('a')
b = mx.sym.Variable('b')
b = a + a * a
data = mx.nd.ones((2,3))*2
ex = b.bind(ctx=mx.cpu(), args={'a':data, 'b':data})
ex.forward()
ex.outputs[0].asnumpy()