theano LeNet 代码分析

theano 平台搭建好后开始第一个theano 深度学习代码阅读。了解怎样使用theano 实现网络搭建以及学习

功能:手写体识别

数据集:[MNIST]:mnist.pkl.gz

平台:theano

网络:LeNet

代码:code(注释好的代码放上去咯)

1.网络结构

2.代码总体架构(手绘)


3.代码分析

 #【LeNet网络总框架】:LeNetConvPoolLayer.py 看的时候结合mlp.py

<pre name="code" class="python"># -*- coding: utf-8 -*-
#来自网络
import os
import sys
import timeit
import pp
import numpy
import theano
import theano.tensor as T
from theano.tensor.signal import downsample
from theano.tensor.nnet import conv
#from logistic_sgd import LogisticRegression, load_data
from mlp import HiddenLayer,LR as LogisticRegression,load_data

#先从main函数: evaluate_lenet5()看起
#该函数实现了卷积+采样(池化)的过程
class LeNetConvPoolLayer(object):
    """卷积层+采样层"""
#输入的参数
    def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2, 2)):
        """
        rng,input。
	filter_shape: 长度为4的元组或list
        filter_shape: (过滤器数目, 输入特征图数目, 过滤器高度, 过滤器宽度)

        image_shape: 长度为4的元组或list
        image_shape: (样本块大小, 输入特征图数目, 图像高度, 图像宽度)

        poolsize: 长度为2的元组或list
        poolsize: 下采样的shape大小(#rows, #cols)
        """

# 断言,确定image_shape的1号元素与filter_shape的1号元素相等 如不成立会引发一个错误
# 因为从以上定义中可以知道,这个元素代表输入特征图的数量
        assert image_shape[1] == filter_shape[1] 
        self.input = input

        # prod()返回元素之积。如果filter_shape=(2,4,3,3),
        # 那么filter_shape[1:]=(4,3,3)
        # prod(filter_shape[1:])=4*3*3=36
	#(这里试根据tutorial来的)
        fan_in = numpy.prod(filter_shape[1:])
        # each unit in the lower layer receives a gradient from:
        # "num output feature maps * filter height * filter width" /
        #   pooling size
        fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) /
                   numpy.prod(poolsize))
                   
        # 用随机均匀分布初始化权值W
        W_bound = numpy.sqrt(6. / (fan_in + fan_out))
#theano.shared 就是一个共享变量,在程序的任何地方都可以用,而且值相同theano.shared(值,类型)
#numpy.asarray 将列表转化为数组
#numpy.random.RandomState.uniform 统一随机生成数据的范围()
	'''
	Draw samples from the distribution:
	>>>

	>>> s = np.random.uniform(-1,0,1000)

	All values are within the given interval:
	>>>

	>>> np.all(s >= -1)
	True
	>>> np.all(s < 0)
	True


	'''
        self.W = theano.shared(
            numpy.asarray(
                rng.uniform(low=-W_bound, high=W_bound, size=filter_shape),
                dtype=theano.config.floatX
            ),
            borrow=True
        )

        # 每一张输出特征图都有一个一维的偏置值,初始化为0。偏置也是属于共享变量的
        b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX)
        self.b = theano.shared(value=b_values, borrow=True)

        # 将输入特征图与过滤器进行卷积操作 
        conv_out = conv.conv2d(
            input=input,
            filters=self.W,
            filter_shape=filter_shape,
            image_shape=image_shape
        )

        # 用maxpooling方法下采样每一个张特征图 池化
        pooled_out = downsample.max_pool_2d(
            input=conv_out,
            ds=poolsize,
            ignore_border=True
        )

        # 先把偏置进行张量扩张,由1维扩展为4维张量(1*2*1*1)    
        # 再把扩展后的偏置累加到采样输出
        # 把累加结果送入tanh非线性函数得到本层的网络输出
        self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))

        # store parameters of this layer
        self.params = [self.W, self.b]

        # keep track of model input
        self.input = input

#原始参数:learning_rate:学习速率 n_epochs:就是整个数据集 mnist.pkl.gz中全部重复学习次数
#卷积核的个数:nkerns=[20, 50]【nkerns[0]是layer1中卷积核个数,同理nkernes[1]为layer2】
#每一次学习输入的数据:batch_size=500张 后面会计算出 n_train_batches 为100次:500*100=50000张训练图
def evaluate_lenet5(learning_rate=0.1, n_epochs=1000,
                    dataset='mnist.pkl.gz',
                    nkerns=[20, 50], batch_size=1000):
    """ Demonstrates lenet on MNIST dataset
    实验数据集是MNIST数据集。  
    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer
    n_epochs是最大迭代次数。一次完整迭代包括计算完所有完整数据,即(总数size/batch_size)次
    
    :type dataset: string
    :param dataset: path to the dataset used for training /testing (MNIST here)
    数据集路径
    
    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer
    卷积核数目。第一个下采样层有20个卷积核,第二个下采样有50个卷积核。
    一个卷积核经过卷积计算会生成一张特征图。
    (我认为卷积核就相当于神经元的个数,对应着权值的元素个数)
    """
    rng = numpy.random.RandomState(23455)#产生随机数

    datasets = load_data(dataset)#最终返回的是一个数组,里面有三类数据,三类标签 【训练、验证、测试】

    train_set_x, train_set_y = datasets[0]#训练集
    valid_set_x, valid_set_y = datasets[1]#验证集
    test_set_x, test_set_y = datasets[2]#测试集

    # compute number of minibatches (计算需要多少次进行) for training, validation and testing 
#得到各训练集中的总数
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
#计算以batch_size为一批计算需要多少次迭代
    n_train_batches /= batch_size
    n_valid_batches /= batch_size
    n_test_batches /= batch_size

    # allocate symbolic variables for the data
#这里的index也是个变量,后面的minibatch_index会对其赋值,表示选择第几批数据,每批batch_size个图像
    index = T.lscalar()  
#这里的x,y就是符号象征,相当于定义变量,后面的层中会有对应的gives[x:  y: ]与之对应赋值
    # start-snippet-1
    x = T.matrix('x')   # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

# 构造第0层的输入数据。就是把形状shape为(batch_size,28*28)数据块转化为四维(batch_size,1,28,28)
# (batch_size,1,28*28)就是有batch_size行,一行对应一个样本,每行有28*28列,是对应样本的具体数据【就是构造一个大矩阵,每行放1个图,大小为28*28,放batch_size行】
#就是把刚才的x弄成 batch_size[行]*(1个×(28*28))【列】大小(不理解见手绘图)
    layer0_input = x.reshape((batch_size, 1, 28, 28))

    # Construct the first convolutional pooling layer:(具体见cnn卷积、池化后图像尺寸的变化)
    # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24)
    # maxpooling reduces this further to (24/2, 24/2) = (12, 12)
    # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12)
    layer0 = LeNetConvPoolLayer(
        rng,
        input=layer0_input,
        image_shape=(batch_size, 1, 28, 28),
#一次输入batch_size张图,每张分别与一个卷积模板进行卷积(这里面的1指的是一行有1个(28*28)的图像)
        filter_shape=(nkerns[0], 1, 5, 5),#给出这个模板的大小 nkernes[0] 50个 5*5 的模板
#每张和50张模板卷积,大小为图像大小为28*28 也即输入的数据大小为batch_size[行]*1×(28*28)【列】大小
        poolsize=(2, 2)#给出池化的大小
    )

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8)
    # maxpooling reduces this further to (8/2, 8/2) = (4, 4)
    # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4)
    layer1 = LeNetConvPoolLayer(
        rng,
        input=layer0.output,
        image_shape=(batch_size, nkerns[0], 12, 12),
#第二层的输入batch_size×nkerns[0]张图片,每张大小为12*12 就是输入数据的大小为 batch_size【行】×(nkerns[0]*(12*12))【列】
#or (500, 50 * 12 * 12) = (500, 7200)       
	filter_shape=(nkerns[1], nkerns[0], 5, 5),
#卷积核为20(nkerns[1])×50(nkerns[0])个每个大小为5*5类比上句
         poolsize=(2, 2)
    )

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4),
    # or (500, 50 * 4 * 4) = (500, 800) with the default values.
    layer2_input = layer1.output.flatten(2)

    # construct a fully-connected sigmoidal layer in mlp.py  隐层,与输入层是全连接
    layer2 = HiddenLayer(
        rng,
        input=layer2_input,
        n_in=nkerns[1] * 4 * 4,#输入数据的大小 20×4*4=320
        n_out=500,#最终输出500个向量
        activation=T.tanh#非线性激活函数
    )

    # 就是mlp.py 中的LR函数 逻辑回归 在该层中权重W以及偏置b均初始化为0(注意只是在该层中的初始化),并设置了回归函数
    layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10)

    # the cost we minimize during training is the NLL of the model
    cost = layer3.negative_log_likelihood(y)#执行代价函数
#测试集函数
    test_model = theano.function(
        [index],#批次索引
        layer3.errors(y),#调用时执行这个计算错误的函数【测试数据集上的错误】
        givens={#输入对应的数据以及标签
            x: test_set_x[index * batch_size: (index + 1) * batch_size],
            y: test_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )
#验证集函数
    validate_model = theano.function(
        [index],
        layer3.errors(y),#同理计算验证集数据错误
        givens={
            x: valid_set_x[index * batch_size: (index + 1) * batch_size],
            y: valid_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    # create a list of all model parameters to be fit by gradient descent(梯度下降)
    params = layer3.params + layer2.params + layer1.params + layer0.params

    # create a list of gradients (梯度)for all model parameters
    grads = T.grad(cost, params)#计算代价函数cost对训练参数的导数

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.
#这里的updates就是用来修改内部参数的。跟新权重和偏移
    updates = [
        (param_i, param_i - learning_rate * grad_i)
        for param_i, grad_i in zip(params, grads)
    ]
#训练函数
    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )
    # end-snippet-1

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is 如果训练的好的话加倍训练次数
                       
#当新的验证误差是原来的0.995倍时,才会更新best_validation_loss。即误差小了,但是至少要小了0.99
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience / 2)#以最小的训练次数去验证
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0#最好得分对应的训练次数
    test_score = 0.
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False
#n_epochs:整个'mnist.pkl.gz'跑几遍
    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index#现在一共训练的数量

            if iter % 100 == 0: #每隔100次打印一次
                print 'training @ iter = ', iter
            cost_ij = train_model(minibatch_index)#真正的调用这个函数去训练(从高层调用激活所有的层)
	    #print 'minibatch_index=',minibatch_index
#validation_frequency = min(n_train_batches, patience / 2) 每隔validation_frequency进行一次验证
            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set 这里调用验证模型去计算layer3.errors
                validation_losses = [validate_model(i) for i
                                     in xrange(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)#计算均值
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
# validation_frequency = min(n_train_batches, patience / 2) 会影响到验证集验证频率
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number 更新最好的记录
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set 在测试集上测试
                    test_losses = [
                        test_model(i)
                        for i in xrange(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = timeit.default_timer()
    print('Optimization complete.')
    print('Best validation score of %f % evaluate_lenet5()% obtained at iteration %i, '
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))

if __name__ == '__main__':
    evaluate_lenet5()


def experiment(state, channel):
    evaluate_lenet5(state.learning_rate, dataset=state.dataset)

 

#【具体细节实现:结合mlp.py


  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 2
    评论
LeNet-5神经网络 C源代码,这个写的比较好,可以用gcc编译去跑,结合理论可以对深度学习有更深刻的了解 介绍 根据YANN LECUN的论文《Gradient-based Learning Applied To Document Recognition》设计的LeNet-5神经网络,C语言写成,不依赖任何第三方库。 MNIST手写字符集初代训练识别率97%,多代训练识别率98%。 DEMO main.c文件为MNIST数据集的识别DEMO,直接编译即可运行,训练集60000张,测试集10000张。 项目环境 该项目为VISUAL STUDIO 2015项目,用VISUAL STUDIO 2015 UPDATE1及以上直接打开即可编译。采用ANSI C编写,因此源码无须修改即可在其它平台上编译。 如果因缺少openmp无法编译,请将lenet.c中的#include和#pragma omp parallel for删除掉即可。 API #####批量训练 lenet: LeNet5的权值的指针,LeNet5神经网络的核心 inputs: 要训练的多个图片对应unsigned char二维数组的数组,指向的二维数组的batchSize倍大小内存空间指针。在MNIST测试DEMO中二维数组为28x28,每个二维数组数值分别为对应位置图像像素灰度值 resMat:结果向量矩阵 labels:要训练的多个图片分别对应的标签数组。大小为batchSize batchSize:批量训练输入图像(二维数组)的数量 void TrainBatch(LeNet5 *lenet, image *inputs, const char(*resMat)[OUTPUT],uint8 *labels, int batchSize); #####单个训练 lenet: LeNet5的权值的指针,LeNet5神经网络的核心 input: 要训练的图片对应二维数组 resMat:结果向量矩阵 label: 要训练的图片对应的标签 void Train(LeNet5 *lenet, image input, const char(*resMat)[OUTPUT],uint8 label); #####预测 lenet: LeNet5的权值的指针,LeNet5神经网络的核心 input: 输入的图像的数据 labels: 结果向量矩阵指针 count: 结果向量个数 return 返回值为预测的结果 int Predict(LeNet5 *lenet, image input, const char(*labels)[LAYER6], int count); #####初始化 lenet: LeNet5的权值的指针,LeNet5神经网络的核心

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值