先是全连接层的代码:
class FullyConnectedLayer(object):
def __init__(self, n_in, n_out, activation_fn=sigmoid, p_dropout=0.0):
self.n_in = n_in
self.n_out = n_out
self.activation_fn = activation_fn
self.p_dropout = p_dropout
# Initialize weights and biases
self.w = theano.shared(
np.asarray(
np.random.normal(
loc=0.0, scale=np.sqrt(1.0/n_out), size=(n_in, n_out)),
dtype=theano.config.floatX),
name='w', borrow=True)
self.b = theano.shared(
np.asarray(np.random.normal(loc=0.0, scale=1.0, size=(n_out,)),
dtype=theano.config.floatX),
name='b', borrow=True)
self.params = [self.w, self.b]
def set_inpt(self, inpt, inpt_dropout, mini_batch_size):
self.inpt = inpt.reshape((mini_batch_size, self.n_in))
self.output = self.activation_fn((1-self.p_dropout)*T.dot(self.inpt, self.w) + self.b)
self.y_out = T.argmax(self.output, axis=1)
self.inpt_dropout = dropout_layer(
inpt_dropout.reshape((mini_batch_size, self.n_in)), self.p_dropout)
self.output_dropout = self.activation_fn(
T.dot(self.inpt_dropout, self.w) + self.b)
def accuracy(self, y):
return T.mean(T.eq(y, self.y_out))
__init__函数主要就是初始化weights和biases看懂就问题不大了。set_input方法是用来对全连接层设置输入,计算相应的输出的(注意input是python里的函数,所以用inpt)。我们用两种不同的方法设置输入:self.inpt和self.inpt_dropout,在使用dropout的时候我们移除了一部分神经元self.p_dropout。所以self.inpt_dropout和self.output_dropout在训练的时候使用,self.inpt和self.output有其他的用途,比如计算验证集的正确率等。卷积层和Softmax层代码跟全连接层差不多,就是多了cost和accuracy两个方法,不记笔记了,代码在最后。
然后是Network这个类:
class Network(object):
def __init__(self, layers, mini_batch_size):
self.layers = layers
self.mini_batch_size = mini_batch_size
self.params = [param for layer in self.layers for param in layer.params]
self.x = T.matrix("x")
self.y = T.ivector("y")
init_layer = self.layers[0]
init_layer.set_inpt(self.x, self.x, self.mini_batch_size)
for j in range(1, len(self.layers)):
prev_layer, layer = self.layers[j-1], self.layers[j]
layer.set_inpt(prev_layer.output, prev_layer.output_dropout, self.mini_batch_size)
self.output = self.layers[-1].output
self.output_dropout = self.layers[-1].output_dropout
self.x和self.y定义了Theano典型变量,代表了输入和理想输出。
init_layer.set_inpt(self.x, self.x, self.mini_batch_size)
输入了两个self.x,因为我们分别用了两次网络(有dropout和无dropout),for循环把self.x向前传播,帮助我们定义了self.output和self.output_dropout。
之后是SGD方法,有点长,分段来记:
def SGD(self, training_data, epochs, mini_batch_size, eta,
validation_data, test_data, lmbda=0.0):
training_x, training_y = training_data
validation_x, validation_y = validation_data
test_x, test_y = test_data
# compute number of minibatches for training, validation and testing
num_training_batches = size(training_data)/mini_batch_size
num_validation_batches = size(validation_data)/mini_batch_size
num_test_batches = size(test_data) / mini_batch_size
这一段hin容易懂啦,下一段比较重要:
# define the (regularized) cost function, symbolic gradients, and updates
l2_norm_squared = sum([(layer.w*layer.w).sum() for layer in self.layers])
cost = self.layers[-1].cost(self)+0.5*lmbda*l2_norm_squared/num_training_batches
grads = T.grad(cost, self.params)
updates = [(param, param-eta*grad) for param, grad in zip(self.params, grads)]
注意我们用了L2正则化,这一段计算了cost和梯度,用updates给两个参数更新&#x