1.开始
撸主本文简单搬运deepLearning.net内容,使用python实现。
注!!本篇并不是教程啊!姑且算是学习笔记吧
撸主并不是个很勤快的人,本科生为了出国也是拼了( ´Д`)ノ,如果不幸对代码或者原理理解有所偏差,还望指出。共同进步么么大ヽ[・ω・`●]。
2.Logistic Regression
这里是常用的二值分类回归方法。这里使用了它的升级版softmax回归:
对于输入x,根据给定的参数W,b将映射到许多超平面
yi
,对应的距离反映了x属于
yi
的概率。于是我们如果要输出的是一个最可能的值,就这样:
ypred=argmaxiP(Y=i|x,W,b)
3.代码噜
a.初始化
def __init__(self, input, n_in, n_out):
'''
输入的数据集包括traing set(in minibatches),validation set, test set.
n_in,n_out分别是x,y的维数.通过n_in,n_out就可以确定总共需要的参数个数(n_in*n_out).
'''
# 初始化0矩阵
self.W = theano.shared(
value=numpy.zeros(
(n_in, n_out),
dtype=theano.config.floatX
),
name='W',
borrow=True
#关于borrow=true我也不太理解,似乎是和内存是是使用有关,望解答~~
)
# 初始化编织向量b,n_out维
self.b = theano.shared(
value=numpy.zeros(
(n_out,),
dtype=theano.config.floatX
),
name='b',
borrow=True
)
#就是上面给出的两个公式。
#值得注意的是,这里形式化定义了P(y|x,W,b)—一个softmax输出的
#神经网络。我们可以把它替换成其他乱七八糟的回归函数。什么
#crossentropy啊,sigmoid啊等等等...你甚至可以自己写一个。
self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W) + self.b)
self.y_pred = T.argmax(self.p_y_given_x, axis=1)
self.params = [self.W, self.b]
b.Negative log likelihood
通过最大化这个目标函数,我们得以训练茫茫多的参数。
def negative_log_likelihood(self, y):
return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]), y])
就这么简单的一行语句也不是很容易看懂啊!!!
一下是撸主自己的理解!小心被窝带跑偏了
从里面往外看,
self.p_y_given_x
就是上面定义的P(y|x)
,
那么T.log(self.p_y_given_x)
就是对P
取对数呗!这时候的输出应该是一个矩阵,每行是一个训练样例算出的softmax概率。因为我们用的是MSGD,所以行数是每个minibatch组中训练样例的个数,是可以自己指定的。
举个例子吧!如果minibatches=2,class=4,那么我现在是长这样儿的:
回过头来看中括号里面的内容,
T.arange(y.shape[0])
就是一个简单的枚举向量,
(1,2,...,n−1)
,这里产生这样一个向量的目的就是为了取得上面个帅帅矩阵的每一行。
然后
y
就是监督学习中重要的目标值。跟上面矩阵的每一行同维。
最后!!套上一层
T.mean(blablabla)
就是这组minibatch的平均输出,和期望输出啦。
c.小插曲
之后定义了错误检查errors(self,y)
方法和加载数据load_data(self,y)
,shared_dataset(data_xy,borrow=True)
方法。
代码使用的测试集是leCun等大牛的MNIST,包含了超多手写体数字,等着你来识别。下载请戳。
另外就是Theano的一个特点感觉是使用了一个叫shared Variable的东西。Theano为了训练效率而使用GPU进行训练。shared Variable提升了Theano将数据拷贝进GPU的速度。或者说允许GPU与CPU共享变量。
d.主体
def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000,
dataset='mnist.pkl.gz',
batch_size=600):
#加载数据集
datasets = load_data(dataset)
train_set_x, train_set_y = datasets[0]
valid_set_x, valid_set_y = datasets[1]
test_set_x, test_set_y = datasets[2]
#minibatch的组数
n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size
######################
# BUILD ACTUAL MODEL #
######################
print '... building the model'
# allocate symbolic variables for the data
index = T.lscalar() # index to a [mini]batch
x = T.matrix('x') # data, presented as rasterized images
y = T.ivector('y') # labels, presented as 1D vector of [int] labels
#logistic regression 28*28个像素~~~十种可能~~~你值得拥有
classifier = LogisticRegression(input=x, n_in=28 * 28, n_out=10)
# the cost we minimize during training is the negative log likelihood of
# the model in symbolic format
cost = classifier.negative_log_likelihood(y)
#对变量进行赋值
test_model = theano.function(
inputs=[index],
outputs=classifier.errors(y),
givens={
x: test_set_x[index * batch_size: (index + 1) * batch_size],
y: test_set_y[index * batch_size: (index + 1) * batch_size]
}
)
validate_model = theano.function(
inputs=[index],
outputs=classifier.errors(y),
givens={
x: valid_set_x[index * batch_size: (index + 1) * batch_size],
y: valid_set_y[index * batch_size: (index + 1) * batch_size]
}
)
# 哦熟悉的求导出现了!要训练了
g_W = T.grad(cost=cost, wrt=classifier.W)
g_b = T.grad(cost=cost, wrt=classifier.b)
updates = [(classifier.W, classifier.W - learning_rate * g_W),
(classifier.b, classifier.b - learning_rate * g_b)]
# 规定update rules并输入到train_model中
train_model = theano.function(
inputs=[index],
outputs=cost,
updates=updates,
givens={
x: train_set_x[index * batch_size: (index + 1) * batch_size],
y: train_set_y[index * batch_size: (index + 1) * batch_size]
}
)
# end-snippet-3
###############
# TRAIN MODEL #
###############
print '... training the model'
#patience实现了使当训练循环一定时间后性能却没有得到明显改善时,提前结束训练。
# early-stopping parameters
patience = 5000 # look as this many examples regardless
patience_increase = 2 # wait this much longer when a new best is
# found
improvement_threshold = 0.995 # a relative improvement of this much is
# considered significant
validation_frequency = min(n_train_batches, patience / 2)
# go through this many
# minibatche before checking the network
# on the validation set; in this case we
# check every epoch
best_validation_loss = numpy.inf
test_score = 0.
start_time = time.clock()
done_looping = False
epoch = 0
while (epoch < n_epochs) and (not done_looping):
epoch = epoch + 1
for minibatch_index in xrange(n_train_batches):
minibatch_avg_cost = train_model(minibatch_index)
# iteration number
iter = (epoch - 1) * n_train_batches + minibatch_index
#cross validation
if (iter + 1) % validation_frequency == 0:
# compute zero-one loss on validation set
validation_losses = [validate_model(i)
for i in xrange(n_valid_batches)]
this_validation_loss = numpy.mean(validation_losses)
print(
'epoch %i, minibatch %i/%i, validation error %f %%' %
(
epoch,
minibatch_index + 1,
n_train_batches,
this_validation_loss * 100.
)
)
# if we got the best validation score until now
if this_validation_loss < best_validation_loss:
#improve patience if loss improvement is good enough
if this_validation_loss < best_validation_loss * \
improvement_threshold:
patience = max(patience, iter * patience_increase)
best_validation_loss = this_validation_loss
# test it on the test set
test_losses = [test_model(i)
for i in xrange(n_test_batches)]
test_score = numpy.mean(test_losses)
print(
(
' epoch %i, minibatch %i/%i, test error of'
' best model %f %%'
) %
(
epoch,
minibatch_index + 1,
n_train_batches,
test_score * 100.
)
)
if patience <= iter:
done_looping = True
break
end_time = time.clock()
print(
(
'Optimization complete with best validation score of %f %%,'
'with test performance %f %%'
)
% (best_validation_loss * 100., test_score * 100.)
)
print 'The code run for %d epochs, with %f epochs/sec' % (
epoch, 1. * epoch / (end_time - start_time))
print >> sys.stderr, ('The code for file ' +
os.path.split(__file__)[1] +
' ran for %.1fs' % ((end_time - start_time)))
#最后就是就为的主函数辣
if __name__ == '__main__':
sgd_optimization_mnist()
e.后续
后面因为感觉原程序注释写的很详细就没有写很多,思想很简单,就是实现起来比较复杂。实验结果争取明天po!
EOF
by more.
2015.5.3
转载请注明出处。