DropOut
dropout过程:
(1) 开始删除随机选择的一半神经元;
(2) 利用SGD(随机梯度下降法)中抽取的一个样本集在更改过的神经网络上正向和反向更新weights和bases;
(3) 恢复之前删除过的神经元重新随机选择一半神经元删除,然后正向反向更新weights和bases,重复此过程;
(4) 由于之前学习出来的神经网络中的神经元都是在只有一半神经元的基础上学习的,当所有神经元被恢复后,把隐藏层的所有权重减半.
code3:
#!/usr/bin/python
# coding:utf-8
import cPickle
import gzip
import numpy as np
# 通过利用GPU获得与手工用C实现差不多的性能
import theano
# tensor是在numpy 基础上更加适用于神经网络的张量操作的库, tensor所有的操作都是基于符号的
import theano.tensor as T
from theano.tensor.nnet import conv
from theano.tensor.nnet import softmax
from theano.tensor import shared_randomstreams
# from theano.tensor.signal import downsample
import theano.tensor.signal.pool as downsample
from theano.tensor.nnet import sigmoid
from theano.tensor import tanh
GPU = True
if GPU:
print "Trying to run under a GPU. If this is not desired, then modify "+"network3.py\nto set the GPU flag to False."
try:
theano.config.device = 'gpu'
except:
pass
theano.config.floatX = 'float32'
else:
print "Running with a CPU. If this is not desired, then the modify "+"network3.py to set\nthe GPU flag to True."
# 加载MNIST数据
def load_data_shared(filename="../data/mnist.pkl.gz"):
f = gzip.open(filename, 'rb')
training_data, validation_data, test_data = cPickle.load(f)
f.close()
# 将数据放入共享变量中,允许theano复制数据到GPU
def shared(data):
shared_x = theano.shared(np.asarray(data[0], dtype=theano.config.floatX), borrow=True)
shared_y = theano.shared(np.asarray(data[1], dtype=theano.config.floatX), borrow=True)
return shared_x, T.cast(shared_y, "int32")
return [shared(training_data), shared(validation_data), shared(test_data)]
# 神经元的激活函数
def linear(z):
return z
# ReLU
def ReLU(z):
return T.maximum(0.0, z)
class Network(object):
def __init__(self, layers, mini_batch_size):
self.layers = layers
self.mini_batch_size = mini_batch_size
self.params = [param for layer in self.layers for param in layer.params]
self.x = T.matrix("x")
self.y = T.ivector("y")
init_layer = self.layers[0]
init_layer.set_inpt(self.x, self.x, self.mini_batch_size)
for j in xrange(1, len(self.layers)):
prev_layer, layer = self.layers[j-1], self.layers[j]
layer.set_inpt(prev_layer.output, prev_layer.output_dropout, self.mini_batch_size)
self.output = self.layers[-1].output
self.output_dropout = self.layers[-1].output_dropout
# 采用小批量随机梯度下降训练
def SGD(self, training_data, epochs, mini_batch_size, eta, validation_data, test_data, lmbda=0.0):
training_x, training_y = training_data
validation_x, validation_y = validation_data
test_x, test_y = test_data
# 计算minibatches训练,验证和测试
num_training_batches = size(training_data)/mini_batch_size
num_validation_batches = size(validation_data)/mini_batch_size
num_test_batches = size(test_data)/mini_batch_size
# 定义(正则)的成本函数,象征性的梯度,并更新
l2_norm_squared = sum([(layer.w**2).sum() for layer in self.layers])
cost = self.layers[-1].cost(self)+0.5*lmbda*l2_norm_squared/num_training_batches
grads = T.grad(cost, self.params)
updates = [(param, param-eta*grad)for param, grad in zip(self.params, grads)]
# 定义培训小批量的功能,并计算验证和测试小批量的准确性。
# mini-batch index
i = T.lscalar()
# updates:可以迭代对,列表/元组/OrderedDict,根据这些表达式更新共享变量输入的值。
train_mb = theano.function([i], cost, updates=updates,
# givens:可变对象(Var1,Var2)的变量,列表,元组或字典,每对中的Var1和Var2必须具有相同的类型,在计算图中进行具体的替换(Var2替换Var1)
givens={self.x:training_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size],
self.y:training_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size]})
validate_mb_accuracy = theano.function([i], self.layers[-1].accuracy(self.y),
givens={self.x:validation_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size],
self.y:validation_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size]})
test_mb_accuracy = theano.function([i], self.layers[-1].accuracy(self.y),
givens={self.x:test_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size],
self.y:test_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size]})
self.test_mb_predictions = theano.function([i], self.layers[-1].y_out,
givens={self.x:test_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size]})
# 训练 cost_ij
best_validation_accuracy = 0.0
for epoch in xrange(epochs):
for minibatch_index in xrange(num_training_batches):
iteration = num_training_batches*epoch+minibatch_index
if iteration % 1000 == 0:
print("Training mini-batch number {0}".format(iteration))
cost_ij = train_mb(minibatch_index)
if (iteration+1) % num_training_batches == 0:
validation_accuracy = np.mean([validate_mb_accuracy(j) for j in xrange(num_validation_batches)])
print("Epoch {0}: validation accuracy {1:.2%}".format(epoch, validation_accuracy))
if validation_accuracy >= best_validation_accuracy:
print("This is the best validation accuracy to date.")
best_validation_accuracy = validation_accuracy
best_iteration = iteration
if test_data:
test_accuracy = np.mean([test_mb_accuracy(j) for j in xrange(num_test_batches)])
print('The corresponding test accuracy is {0:.2%}'.format(test_accuracy))
print("Finished training network.")
print("Best validation accuracy of {0:.2%} obtained at iteration {1}".format(best_validation_accuracy, best_iteration))
print("Corresponding test accuracy of {0:.2%}".format(test_accuracy))
# 定义层类型
# 用于创建一个组合的卷积和最大混合层
class ConvPoolLayer(object):
def __init__(self, filter_shape, image_shape, poolsize=(2, 2), activation_fn=sigmoid):
"""
filter_shape是一个长度为4的元组,其作品数量的过滤器,输入特征图的数量,过滤器和过滤器的高度,宽度。
image_shape是一个长度为4的元组,其作品都是小批量,输入特征的数字地图,图像的高度,和图像的宽度。
` poolsize `是一个元组的长度为2,X和Y的条目池的大小。
"""
self.filter_shape = filter_shape
self.image_shape = image_shape
self.poolsize = poolsize
self.activation_fn=activation_fn
# 初始化:weights/biases
n_out = (filter_shape[0]*np.prod(filter_shape[2:])/np.prod(poolsize))
self.w = theano.shared(np.asarray(np.random.normal(loc=0, scale=np.sqrt(1.0/n_out), size=filter_shape),
dtype=theano.config.floatX), borrow=True)
self.b = theano.shared(np.asarray(np.random.normal(loc=0, scale=1.0, size=(filter_shape[0],)),
dtype=theano.config.floatX), borrow=True)
self.params = [self.w, self.b]
def set_inpt(self, inpt, inpt_dropout, mini_batch_size):
self.inpt = inpt.reshape(self.image_shape)
conv_out = conv.conv2d(input=self.inpt, filters=self.w, filter_shape=self.filter_shape,image_shape=self.image_shape)
# pooled_out = downsample.max_pool_2d(input=conv_out, ds=self.poolsize, ignore_border=True)
pooled_out = downsample.pool_2d(input=conv_out, ds=self.poolsize, mode='max', ignore_border=True)
self.output = self.activation_fn(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))
# 卷积层中没有dropout
self.output_dropout = self.output
# 全链接层
class FullyConnectedLayer(object):
def __init__(self, n_in, n_out, activation_fn=sigmoid, p_dropout=0.0):
self.n_in = n_in
self.n_out = n_out
self.activation_fn = activation_fn
self.p_dropout = p_dropout
# 初始化权重和偏差
self.w = theano.shared(np.asarray(np.random.normal(loc=0.0, scale=np.sqrt(1.0/n_out), size=(n_in, n_out)),
dtype=theano.config.floatX),name='w', borrow=True)
self.b = theano.shared(np.asarray(np.random.normal(loc=0.0, scale=1.0, size=(n_out,)),
dtype=theano.config.floatX),name='b', borrow=True)
self.params = [self.w, self.b]
def set_inpt(self, inpt, inpt_dropout, mini_batch_size):
self.inpt = inpt.reshape((mini_batch_size, self.n_in))
self.output = self.activation_fn((1-self.p_dropout)*T.dot(self.inpt, self.w) + self.b)
self.y_out = T.argmax(self.output, axis=1)
self.inpt_dropout = dropout_layer(inpt_dropout.reshape((mini_batch_size, self.n_in)), self.p_dropout)
self.output_dropout = self.activation_fn(T.dot(self.inpt_dropout, self.w) + self.b)
# 返回小批量的准确性
def accuracy(self, y):
return T.mean(T.eq(y, self.y_out))
# Softmax输出
class SoftmaxLayer(object):
def __init__(self, n_in, n_out, p_dropout=0.0):
self.n_in = n_in
self.n_out = n_out
self.p_dropout = p_dropout
# 初始化权重和偏差
self.w = theano.shared(np.zeros((n_in, n_out), dtype=theano.config.floatX), name='w', borrow=True)
self.b = theano.shared(np.zeros((n_out,), dtype=theano.config.floatX), name='b', borrow=True)
self.params = [self.w, self.b]
def set_inpt(self, inpt, inpt_dropout, mini_batch_size):
self.inpt = inpt.reshape((mini_batch_size, self.n_in))
self.output = softmax((1-self.p_dropout)*T.dot(self.inpt, self.w) + self.b)
self.y_out = T.argmax(self.output, axis=1)
self.inpt_dropout = dropout_layer(inpt_dropout.reshape((mini_batch_size, self.n_in)), self.p_dropout)
self.output_dropout = softmax(T.dot(self.inpt_dropout, self.w) + self.b)
# log-likelihood cost函数
def cost(self, net):
return -T.mean(T.log(self.output_dropout)[T.arange(net.y.shape[0]), net.y])
# mini-batch cost
def accuracy(self, y):
return T.mean(T.eq(y, self.y_out))
# dropout层
# 随机地删除网络中的一半的隐藏神经元,同时让输入层和输出层的神经元保持不变
def dropout_layer(layer, p_dropout):
# np.random.RandomState(0).randint(999999)产生一个小于999999的任意整数
# RandomStreams获取随机数
srng = shared_randomstreams.RandomStreams(np.random.RandomState(0).randint(999999))
# RandomStreams.binomial()产生二项分布
# 要随机地从n个数中以概率p对其进行选择,可以先生成一个掩膜(mask = np.random.binomial(1, p, n))
mask = srng.binomial(n=1, p=1-p_dropout, size=layer.shape)
# cast(x, dtype):象征性地投X到张量型D
# theano.config.floatX = 'float32'
# 屏蔽神经网络的部分节点
return layer*T.cast(mask, theano.config.floatX)
# 返回数据集data的大小
def size(data):
return data[0].get_value(borrow=True).shape[0]
if __name__ == '__main__':
training_data, validation_data, test_data = load_data_shared()
mini_batch_size = 10
# CNN
net = Network([FullyConnectedLayer(n_in=784, n_out=100), SoftmaxLayer(n_in=100, n_out=10)], mini_batch_size)
net.SGD(training_data, 10, mini_batch_size, 0.1, validation_data, test_data)
输出:
Trying to run under a GPU. If this is not desired, then modify network3.py
to set the GPU flag to False.
Training mini-batch number 0
Training mini-batch number 1000
Training mini-batch number 2000
Training mini-batch number 3000
Training mini-batch number 4000
Epoch 0: validation accuracy 92.78%
This is the best validation accuracy to date.
The corresponding test accuracy is 92.11%
Training mini-batch number 5000
Training mini-batch number 6000
Training mini-batch number 7000
Training mini-batch number 8000
Training mini-batch number 9000
Epoch 1: validation accuracy 94.91%
This is the best validation accuracy to date.
The corresponding test accuracy is 94.36%
Training mini-batch number 10000
Training mini-batch number 11000
Training mini-batch number 12000
Training mini-batch number 13000
Training mini-batch number 14000
Epoch 2: validation accuracy 95.86%
This is the best validation accuracy to date.
The corresponding test accuracy is 95.31%
Training mini-batch number 15000
Training mini-batch number 16000
Training mini-batch number 17000
Training mini-batch number 18000
Training mini-batch number 19000
......
Epoch 56: validation accuracy 97.77%
This is the best validation accuracy to date.
The corresponding test accuracy is 97.65%
Training mini-batch number 285000
Training mini-batch number 286000
Training mini-batch number 287000
Training mini-batch number 288000
Training mini-batch number 289000
Epoch 57: validation accuracy 97.77%
This is the best validation accuracy to date.
The corresponding test accuracy is 97.65%
Training mini-batch number 290000
Training mini-batch number 291000
Training mini-batch number 292000
Training mini-batch number 293000
Training mini-batch number 294000
Epoch 58: validation accuracy 97.76%
Training mini-batch number 295000
Training mini-batch number 296000
Training mini-batch number 297000
Training mini-batch number 298000
Training mini-batch number 299000
Epoch 59: validation accuracy 97.77%
This is the best validation accuracy to date.
The corresponding test accuracy is 97.65%
Finished training network.
Best validation accuracy of 97.77% obtained at iteration 299999
Corresponding test accuracy of 97.65%