DBN代码解析

转自:http://blog.csdn.net/Rainbow0210/article/details/53010694?locationNum=1&fps=1

DBN的实现(DeepLeranToolBox): 

这里是将DBN作为无监督学习框架来使用的,将“学习成果”赋给ANN来完成分类。

训练集是60000张28*28的手写数字图片,测试集是10000张28*28的手写数字图片,对应的单幅图片的特征维度为28*28=784

% function test_example_DBN
load mnist_uint8;

train_x = double(train_x) / 255;
test_x  = double(test_x)  / 255;
train_y = double(train_y);
test_y  = double(test_y);

%%  ex2 train a 100-100 hidden unit DBN and use its weights to initialize a NN
rand('state',0)
%train dbn
%对DBN的初始化
%除了输入层之外有两层,每层100个神经元,即为两个受限玻尔兹曼机
dbn.sizes = [100 100];
%训练次数
opts.numepochs =   2;
%每次随机的样本数量
opts.batchsize = 100;
%更新方向,目前不知道有什么用
opts.momentum  =   0;
%学习速率
opts.alpha     =   1;
%建立DBN
dbn = dbnsetup(dbn, train_x, opts);
%训练DBN
dbn = dbntrain(dbn, train_x, opts);
%至此,已完成了DBN的训练

%unfold dbn to nn
%将DBN训练得到的数据转化为NN的形式
nn = dbnunfoldtonn(dbn, 10);

%设置NN的阈值函数为Sigmoid函数
nn.activation_function = 'sigm';

%train nn
%训练NN
opts.numepochs =  3;
opts.batchsize = 100;
nn = nntrain(nn, train_x, train_y, opts);
[er, bad] = nntest(nn, test_x, test_y);

assert(er < 0.10, 'Too big error');
 
 
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
function dbn = dbnsetup(dbn, x, opts)
    %n是单个样本的特征维度,784
    n = size(x, 2);
    %dbn.sizes是rbm的维度,[784 100 100]
    dbn.sizes = [n, dbn.sizes];

    %numel(dbn.sizes)返回dbn.sizes中的元素个数,对于[784 100 100],则为3
    %初始化每个rbm
    for u = 1 : numel(dbn.sizes) - 1
        %初始化rbm的学习速率
        dbn.rbm{u}.alpha    = opts.alpha;
        %学习方向
        dbn.rbm{u}.momentum = opts.momentum;
        %第一个rbm是784-100, 第二个rbm是100-100
        %对应的连接权重,初始值全为0
        dbn.rbm{u}.W  = zeros(dbn.sizes(u + 1), dbn.sizes(u));
        %用于更新的权重,下同,不再注释
        dbn.rbm{u}.vW = zeros(dbn.sizes(u + 1), dbn.sizes(u));
        %第一个rbm是784,第二个rbm是100
        %显层的偏置值,初始值全为0
        dbn.rbm{u}.b  = zeros(dbn.sizes(u), 1);
        dbn.rbm{u}.vb = zeros(dbn.sizes(u), 1);
        %第一个rbm是100,第二个rbm是100
        %隐层的偏置值,初始值全为0
        dbn.rbm{u}.c  = zeros(dbn.sizes(u + 1), 1);
        dbn.rbm{u}.vc = zeros(dbn.sizes(u + 1), 1);
    end
end

 
 
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
function dbn = dbntrain(dbn, x, opts)
    % n = 1;
    % x = train_x,60000个样本,每个维度为784,即60000*784
    %n为dbn中有几个rbm,这里n=2
    n = numel(dbn.rbm);
    %充分训练第一个rbm
    dbn.rbm{1} = rbmtrain(dbn.rbm{1}, x, opts);
    %通过第一个rbm,依次训练后续的rbm
    for i = 2 : n
        %建立rbm
        x = rbmup(dbn.rbm{i - 1}, x);
        %训练rbm
        dbn.rbm{i} = rbmtrain(dbn.rbm{i}, x, opts);
    end

end

 
 
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
function x = rbmup(rbm, x)
    %sigm为sigmoid函数
    %通过隐层计算下一层
    x = sigm(repmat(rbm.c', size(x, 1), 1) + x * rbm.W');
end

 
 
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
function rbm = rbmtrain(rbm, x, opts)
    %矩阵x中的元素必须是浮点数,且取值为[0,1]
    assert(isfloat(x), 'x must be a float');
    assert(all(x(:)>=0) && all(x(:)<=1), 'all data in x must be in [0:1]');

    %m为样本数量,这里m = 60000
    m = size(x, 1);
    %训练批次,每一批是opts.batchsize个样本,注意这里opts.batchsize必须整除m
    numbatches = m / opts.batchsize;

    %opts.batchsize必须能整除m
    assert(rem(numbatches, 1) == 0, 'numbatches not integer');

    %opts.numepochs,训练次数
    for i = 1 : opts.numepochs

        %随机打乱1-m的数,也就是1-m的随机数,kk是1-m的随机数向量
        kk = randperm(m);

        %训练结果的eer
        err = 0;

        %对每一批数据进行训练
        for l = 1 : numbatches
            %取出opts.batchsize个待训练的样本
            %循环结束后所有样本都进行过训练,且仅训练了一次
            batch = x(kk((l - 1) * opts.batchsize + 1 : l * opts.batchsize), :);

            %赋值给v1
            %这里v1是100*784的矩阵
            v1 = batch;
            %通过v1计算h1的概率,吉布斯抽样
            h1 = sigmrnd(repmat(rbm.c', opts.batchsize, 1) + v1 * rbm.W');
            %通过h1计算v1的概率,吉布斯抽样
            v2 = sigmrnd(repmat(rbm.b', opts.batchsize, 1) + h1 * rbm.W);
            %通过v2计算h2的概率,吉布斯抽样
            h2 = sigm(repmat(rbm.c', opts.batchsize, 1) + v2 * rbm.W');

            %至此,h1,v1,h2,v2均已计算出来,即完成了对比散度算法的大半,只剩下相应权重的更新

            %权重更新的差值计算
            c1 = h1' * v1;
            c2 = h2' * v2;

            rbm.vW = rbm.momentum * rbm.vW + rbm.alpha * (c1 - c2)     / opts.batchsize;
            rbm.vb = rbm.momentum * rbm.vb + rbm.alpha * sum(v1 - v2)' / opts.batchsize;
            rbm.vc = rbm.momentum * rbm.vc + rbm.alpha * sum(h1 - h2)' / opts.batchsize;

            %更新权重
            rbm.W = rbm.W + rbm.vW;
            rbm.b = rbm.b + rbm.vb;
            rbm.c = rbm.c + rbm.vc;

            %计算err
            err = err + sum(sum((v1 - v2) .^ 2)) / opts.batchsize;
        end
        %打印结果
        disp(['epoch ' num2str(i) '/' num2str(opts.numepochs)  '. Average reconstruction error is: ' num2str(err / numbatches)]);

    end
end
Code provided by Ruslan Salakhutdinov and Geoff Hinton Permission is granted for anyone to copy, use, modify, or distribute this program and accompanying programs and documents for any purpose, provided this copyright notice is retained and prominently displayed, along with a note saying that the original programs are available from our web page. The programs and documents are distributed without any warranty, express or implied. As the programs were written for research purposes only, they have not been tested to the degree that would be advisable in any important application. All use of these programs is entirely at the user's own risk. How to make it work: 1. Create a separate directory and download all these files into the same directory 2. Download from http://yann.lecun.com/exdb/mnist the following 4 files: o train-images-idx3-ubyte.gz o train-labels-idx1-ubyte.gz o t10k-images-idx3-ubyte.gz o t10k-labels-idx1-ubyte.gz 3. Unzip these 4 files by executing: o gunzip train-images-idx3-ubyte.gz o gunzip train-labels-idx1-ubyte.gz o gunzip t10k-images-idx3-ubyte.gz o gunzip t10k-labels-idx1-ubyte.gz If unzipping with WinZip, make sure the file names have not been changed by Winzip. 4. Download Conjugate Gradient code minimize.m 5. Download Autoencoder_Code.tar which contains 13 files OR download each of the following 13 files separately for training an autoencoder and a classification model: o mnistdeepauto.m Main file for training deep autoencoder o mnistclassify.m Main file for training classification model o converter.m Converts raw MNIST digits into matlab format o rbm.m Training RBM with binary hidden and binary visible units o rbmhidlinear.m Training RBM with Gaussian hidden and binary visible units o backprop.m Backpropagation for fine-tuning an autoencoder o backpropclassify.m Backpropagation for classification using "encoder" network o CG_MNIST.m Conjugate Gradient optimization for fine-tuning an autoencoder o CG_CLASSIFY_INIT.m Co
评论 4
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值