# 关于Mxnet的一些基础知识理解(1)

797人阅读 评论(0)

(1)Mxnet的数据格式为NDArray，当需要读取可观看的数据，就要调用：

numpy_d = d.asnumpy()

converts it to a Numpy array.

(2)list_arguments （给出当前符号d的输入变量）与list_outputs(给出符号d的输出变量)的说明

import mxnet as mx
a = mx.sym.Variable("A") # represent a placeholder. These can be inputs, weights, or anything else.
b = mx.sym.Variable("B")
c = (a + b) / 10
d = c + 1
调用list_arguments 得到的一定就是用于d计算的所有symbol
d.list_arguments()
# ['A', 'B']

d.list_outputs()
# ['_plusscalar0_output'] This is the default name from adding to scalar.，

# define input shapes
inp_shapes = {'A':(10,), 'B':(10,)}
arg_shapes, out_shapes, aux_shapes = d.infer_shape(**inp_shapes)

arg_shapes # the shapes of all the inputs to the graph. Order matches d.list_arguments()
# [(10, ), (10, )]

out_shapes # the shapes of all outputs. Order matches d.list_outputs()
# [(10, )]

aux_shapes # the shapes of auxiliary variables. These are variables that are not trainable such as batch normalization population statistics. For now, they are save to ignore.
# []

input_arguments = {}
input_arguments['A'] = mx.nd.ones((10, ), ctx=mx.cpu())
input_arguments['B'] = mx.nd.ones((10, ), ctx=mx.cpu())
executor = d.bind(ctx=mx.cpu(),
args=input_arguments, # this can be a list or a dictionary mapping names of inputs to NDArray
grad_req='null') # don't request gradients
args :指出输入的符号以及大小，以词典类型传入

bind完之后，还需要调用一个forward（），就可以运算整个过程。当然，还可以通过executor,对输入的

import numpy as np
# The executor
executor.arg_dict
# {'A': NDArray, 'B': NDArray}

executor.arg_dict['A'][:] = np.random.rand(10,) # Note the [:]. This sets the contents of the array instead of setting the array to a new value instead of overwriting the variable.
executor.arg_dict['B'][:] = np.random.rand(10,)
executor.forward()
executor.outputs
# [NDArray]
output_value = executor.outputs[0].asnumpy()

executor.arg_dict['A']是NDArray类型，再使用executor.arg_dict['A'][:]=赋值，表示以numpy的值覆盖NDArray类型的值，类型依旧是NDArray;如果不加[:],表示以numpy值的array类型直接覆盖。但运算的结果却仍然是以mx.nd.ones(10,)得到的.

# allocate space for inputs
input_arguments = {}
input_arguments['A'] = mx.nd.ones((10, ), ctx=mx.cpu())
input_arguments['B'] = mx.nd.ones((10, ), ctx=mx.cpu())
# allocate space for gradients
grad_arguments['A'] = mx.nd.ones((10, ), ctx=mx.cpu())
grad_arguments['B'] = mx.nd.ones((10, ), ctx=mx.cpu())

executor = d.bind(ctx=mx.cpu(),
args=input_arguments, # this can be a list or a dictionary mapping names of inputs to NDArray
args_grad=grad_arguments, # this can be a list or a dictionary mapping names of inputs to NDArray
grad_req='write') # instead of null, tell the executor to write gradients. This replaces the contents of grad_arguments with the gradients computed.

executor.arg_dict['A'][:] = np.random.rand(10,)
executor.arg_dict['B'][:] = np.random.rand(10,)

executor.forward()
# in this particular example, the output symbol is not a scalar or loss symbol.
# Thus taking its gradient is not possible.
# What is commonly done instead is to feed in the gradient from a future computation.
# this is essentially how backpropagation works.
out_grad = mx.nd.ones((10,), ctx=mx.cpu())
executor.backward([out_grad]) # because the graph only has one output, only one output grad is needed.

# [NDarray, NDArray]

input_shapes = {'A': (10,), 'B': (10, )}
executor = d.simple_bind(ctx=mx.cpu(),
grad_req='write', # instead of null, tell the executor to write gradients
**input_shapes)
executor.arg_dict['A'][:] = np.random.rand(10,)
executor.arg_dict['B'][:] = np.random.rand(10,)

executor.forward()
out_grad = mx.nd.ones((10,), ctx=mx.cpu())
executor.backward([out_grad])

import mxnet as mx
import numpy as np
# First, the symbol needs to be defined
data = mx.sym.Variable("data") # input features, mxnet commonly calls this 'data'
label = mx.sym.Variable("softmax_label")

# One can either manually specify all the inputs to ops (data, weight and bias)
w1 = mx.sym.Variable("weight1")
b1 = mx.sym.Variable("bias1")
l1 = mx.sym.FullyConnected(data=data, num_hidden=128, name="layer1", weight=w1, bias=b1)
a1 = mx.sym.Activation(data=l1, act_type="relu", name="act1")

# Or let MXNet automatically create the needed arguments to ops
l2 = mx.sym.FullyConnected(data=a1, num_hidden=10, name="layer2")

# Create some loss symbol
cost_classification = mx.sym.SoftmaxOutput(data=l2, label=label)

# Bind an executor of a given batch size to do forward pass and get gradients
batch_size = 128
input_shapes = {"data": (batch_size, 28*28), "softmax_label": (batch_size, )}
executor = cost_classification.simple_bind(ctx=mx.gpu(0),
**input_shapes)
此时executor是训练时用

# The above executor computes gradients. When evaluating test data we don't need this.
# We want this executor to share weights with the above one, so we will use bind
# (instead of simple_bind) and use the other executor's arguments.
executor_test = cost_classification.bind(ctx=mx.gpu(0),
args=executor.arg_arrays)
executor_test 是测试时用# executor 里含有arg_dict表示每层的名称
:bias1,data,layer2_bias,layer2_weight...
#executor  里含有 arg_arrays对应每层的具体数(诀窍:带arrays的表示数值)

# initialize the weights
for r in executor.arg_arrays:
r[:] = np.random.randn(*r.shape)*0.02

# Using skdata to get mnist data. This is for portability. Can sub in any data loading you like.
from skdata.mnist.views import OfficialVectorClassification

data = OfficialVectorClassification()
trIdx = data.sel_idxs[:]
teIdx = data.val_idxs[:]
for epoch in range(10):
print "Starting epoch", epoch
np.random.shuffle(trIdx)
#每128个样本，作为一个batchsize
for x in range(0, len(trIdx), batch_size):
# extract a batch from mnist
batchX = data.all_vectors[trIdx[x:x+batch_size]]
batchY = data.all_labels[trIdx[x:x+batch_size]]

# our executor was bound to 128 size. Throw out non matching batches.
if batchX.shape[0] != batch_size:
continue
# Store batch in executor 'data'
#通过executor的 arg_dict 给予“名称”，就能获取该层的数值信息，例如设置'data'，也就是赋予
#输入数据信息。一定要加上[:] ,表示overwritting
executor.arg_dict['data'][:] = batchX / 255.
# Store label's in 'softmax_label'
executor.arg_dict['softmax_label'][:] = batchY
executor.forward()
executor.backward()


#进行一次forward以及一次backward之后，需要对权值进行一次更新。
#pname表示
# do weight updates in imperative
for pname, W, G in zip(cost_classification.list_arguments(), executor.arg_arrays, executor.grad_arrays):
# Don't update inputs
# MXNet makes no distinction between weights and data.
if pname in ['data', 'softmax_label']:
continue
# what ever fancy update to modify the parameters
W[:] = W - G * .001

# Evaluation at each epoch
num_correct = 0
num_total = 0
for x in range(0, len(teIdx), batch_size):
batchX = data.all_vectors[teIdx[x:x+batch_size]]
batchY = data.all_labels[teIdx[x:x+batch_size]]
if batchX.shape[0] != batch_size:
continue
# use the test executor as we don't care about gradients
executor_test.arg_dict['data'][:] = batchX / 255.
executor_test.forward()
num_correct += sum(batchY == np.argmax(executor_test.outputs[0].asnumpy(), axis=1))
num_total += len(batchY)
print "Accuracy thus far", num_correct / float(num_total)

0
0

* 以上用户言论只代表其个人观点，不代表CSDN网站的观点或立场
个人资料
• 访问：52588次
• 积分：798
• 等级：
• 排名：千里之外
• 原创：19篇
• 转载：45篇
• 译文：0篇
• 评论：5条
阅读排行
最新评论