概要
上一篇博客介绍了Logistic Regression,其中最主要的symbol是官方已经实现好的例子,不知道细节是怎么样的,而且结果看着十分奇怪,所以打算自己实现一个,参考官方的例子,How to Create New Operators (Layers) 。官方的教程太少且太简单,不去看底层C++很难弄懂,但去看C++源码更难受,好在踩了很多坑后,终于是勉强实现了,其中还有些细节,以后慢慢琢磨吧。
正文
数据部分是机器学习实战上提供的数据
import mxnet as mx
import numpy as np
#Training data
train_data = []; train_label = []
#Evaluation Data
eval_data = []; eval_label = []
fr = open('horseColicTraining.txt')
for line in fr.readlines():
lineArr = line.strip().split()
train_line = []
for i in range(21):
train_line.extend([float(lineArr[i])])
train_data.append(train_line)
train_label.append(float(lineArr[21]))
fr.close()
fr = open('horseColicTest.txt')
for line in fr.readlines():
lineArr = line.strip().split()
eval_line = []
for i in range(21):
eval_line.extend([float(lineArr[i])])
eval_data.append(eval_line)
eval_label.append(float(lineArr[21]))
fr.close()
eval_data = np.array(eval_data)
eval_label = np.array(eval_label)
train_data = np.array(train_data)
train_label = np.array(train_label)
class Logistic(mx.operator.CustomOp):
def __init__(self, pos_grad_scale, weight):
self.pos_grad_scale = float(pos_grad_scale)
self.weight = mx.nd.ones(int(weight), ctx=mx.gpu(0))
# 此处需要设定为 gpu , 否则默认为 cpu , 即使其他参数为 gpu
self.h = mx.nd.ones(1, mx.gpu(0))
# in_data, out_data 都是 list
def forward(self, is_train, req, in_data, out_data, aux):
# print(self.weight)
inX = mx.nd.sum(in_data[0][0] * self.weight)
h = mx.nd.divide(1, (1 + mx.nd.exp(- inX)))
self.assign(self.h, req[0], h)
if(h.asnumpy()[0] > 0.5): h = 1
else: h=0
self.assign(out_data[0], req[0], h)
def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
# 随机梯度下降
error = in_data[1] - self.h
weights = self.weight + self.pos_grad_scale*error*in_data[0][0]
self.assign(self.weight, req[0], weights)
@mx.operator.register("logistic")
class LogisticProp(mx.operator.CustomOpProp):
def __init__(self, pos_grad_scale, weight):
self.pos_grad_scale = pos_grad_scale
self.weight = weight
super(LogisticProp, self).__init__(need_top_grad=False)
def list_arguments(self):
return ['data', 'label']
def list_outputs(self):
return ['output']
# 此处需要特别注意, data_shape 对应 in_data[0]
# label_shape 对应 in_data[1]
# output_shape 对应 out_data[0]
def infer_shape(self, in_shape):
data_shape = in_shape[0]
label_shape = (in_shape[0][0],)
output_shape = (in_shape[0][0],)
return [data_shape, label_shape], [output_shape]
def infer_type(self, in_type):
return in_type, [in_type[0]], []
def create_operator(self, ctx, shapes, dtypes):
return Logistic(self.pos_grad_scale, self.weight)
# import logging
# logging.basicConfig(level=logging.INFO)
X = mx.sym.Variable('data')
pos = 0.001
batch_size = 1
train_iter = mx.io.NDArrayIter(train_data,train_label, batch_size, shuffle=True,label_name='lr_label')
eval_iter = mx.io.NDArrayIter(eval_data, eval_label, batch_size, shuffle=False)
weight = train_iter.getdata()[0].shape[1] # 权值的维数
# 此处本想将 weight 替换为 mx.nd.array , 但是会被自动转化为 str 类型!!!
lro = mx.symbol.Custom(data=X, pos_grad_scale = pos, weight = weight, name='lr', op_type='logistic')
model = mx.module.Module(symbol = lro, context=mx.gpu(0), data_names=['data'], label_names = ['lr_label'])
# 重要的是 inputs_need_grad=True , 否则不会计算 backward
model.bind(data_shapes=train_iter.provide_data, label_shapes=train_iter.provide_label, inputs_need_grad=True)
# eval_data=eval_iter 作为验证集, 如果不特殊处理则其实当做了 train_data 的一部分
model.fit(train_data=train_iter, optimizer_params={'learning_rate':0.01, 'momentum': 0.9},
num_epoch=100,batch_end_callback = mx.callback.Speedometer(batch_size, 3))
metric = mx.metric.Accuracy()
model.score(train_iter, metric)
print(metric.get())
# network structure
# 下方的实现等价于 model.fit
# # init parameters
# model.init_params(initializer=mx.init.Xavier(magnitude=2.))
# # init optimizer
# model.init_optimizer(optimizer='sgd', optimizer_params=(('learning_rate', 0.1), ))
# # use accuracy as the metric
# metric = mx.metric.create('acc')
# for _ in range(100):
# # 此处十分关键, 因为 for 循环结束只有 train_iter 就为空了,所以需要重新注入
# train_iter = mx.io.NDArrayIter(train_data,train_label, batch_size, shuffle=True,label_name='lr_label')
# for batch in train_iter:
# model.forward(batch) # compute predictions
# # print(model.get_outputs()[0].asnumpy())
# model.update_metric(metric, batch.label) # accumulate prediction accuracy
# model.backward() # compute gradients
# model.update() # update parameters using SGD
# # training accuracy
# print(metric.get())
(‘accuracy’, 0.69230769230769229)
# model.score(eval_iter, metric)
model.score(eval_iter, metric)
print(metric.get())
(‘accuracy’, 0.64179104477611937)
增大迭代次数会得到更高的准确率,不过 shuffle=True 使得结果不太稳定
总结
- forward 和 backward 方法的参数不允许更改 实现上可以转化成 numpy
- 类型进行计算,容易理解和调试,不过没法在gpu上获得加速 infer_shape 方法十分重要,这是随心所欲定制operator的关键
- mx.symbol.Custom 貌似不能随意传递参数,会被当做 str 类型传入,这个十分奇怪,没有弄懂
自己摸索许久也没有完全搞懂,终究是官方文档太少,说多了都是泪T_T