DBN C++代码理解

DBN C++代码理解

上一篇学习了RBM的代码,而DBN是由多个RBM构成的。其训练过程就是,先逐个训练每个RBM,用训练好的权值和偏置初始化一个相应的BP神经网络,再用有标签的数据调优整个网络。下面我们看一下整个DBN的代码。先是几个class的定义。

DBN类定义:

class DBN {

public:
  int N;
  int n_ins;
  int *hidden_layer_sizes;
  int n_outs;
  int n_layers;
  HiddenLayer **sigmoid_layers;
  RBM **rbm_layers;
  LogisticRegression *log_layer;
  DBN(int, int, int*, int, int);
  ~DBN();
  void pretrain(int*, double, int, int);
  void finetune(int*, int*, double, int);
  void predict(int*, double*);
};

HiddenLayer类定义:(顾名思义就是DBN的隐含层结构)

class HiddenLayer {

public:
  int N;
  int n_in;
  int n_out;
  double **W;
  double *b;
  HiddenLayer(int, int, int, double**, double*);
  ~HiddenLayer();
  double output(int*, double*, double);
  void sample_h_given_v(int*, int*);
};


LogisticRegression类定义:(相应NN的层定义)

class LogisticRegression {

public:
  int N;  // num of inputs
  int n_in;
  int n_out;
  double **W;
  double *b;
  LogisticRegression(int, int, int);
  ~LogisticRegression();
  void train(int*, int*, double);
  void softmax(double*);
  void predict(int*, double*);
};

最后是RBM的类定义:(上一篇已经说过的)

class RBM {

public:
  int N;
  int n_visible;
  int n_hidden;
  double **W;
  double *hbias;
  double *vbias;
  RBM(int, int, int, double**, double*, double*);
  ~RBM();
  void contrastive_divergence(int*, double, int);
  void sample_h_given_v(int*, double*, int*);
  void sample_v_given_h(int*, double*, int*);
  double propup(int*, double*, double);
  double propdown(int*, int, double);
  void gibbs_hvh(int*, double*, int*, double*, int*);
  void reconstruct(int*, double*);
};

然后就是网络的训练和预测:

#include <iostream>
#include <math.h>
#include "HiddenLayer.h"
#include "RBM.h"
#include "LogisticRegression.h"
#include "DBN.h"
using namespace std;


double uniform(double min, double max) {             //在max和min之间随机一个数
  return rand() / (RAND_MAX + 1.0) * (max - min) + min;
}

int binomial(int n, double p) {          //二值化
  if(p < 0 || p > 1) return 0;
  
  int c = 0;
  double r;
  
  for(int i=0; i<n; i++) {
    r = rand() / (RAND_MAX + 1.0);
    if (r < p) c++;
  }

  return c;
}

double sigmoid(double x) {
  return 1.0 / (1.0 + exp(-x));
}


// DBN
DBN::DBN(int size, int n_i, int *hls, int n_o, int n_l) {
  int input_size;  //每个RBM输入节点数
  
  N = size;
  n_ins = n_i;      //最开始输入节点数
  hidden_layer_sizes = hls;      //每个隐含层节点个数
  n_outs = n_o;   //最后输出节点数
  n_layers = n_l; //网络隐含层数

  sigmoid_layers = new HiddenLayer*[n_layers]; //新建n_layers个hiddenlayer指针
  rbm_layers = new RBM*[n_layers]; //新建n_layers个rbm指针

  // construct multi-layer
  for(int i=0; i<n_layers; i++) {
    if(i == 0) {
      input_size = n_ins; //第一层RBM的输入节点就是最开始的输入节点
    } else {
      input_size = hidden_layer_sizes[i-1]; //其余的输入节点数就是上一层的节点数
    }

    // construct sigmoid_layer
    sigmoid_layers[i] = new HiddenLayer(N, input_size, hidden_layer_sizes[i], NULL, NULL); //分别建立每个隐含层

    // construct rbm_layer
    rbm_layers[i] = new RBM(N, input_size, hidden_layer_sizes[i],sigmoid_layers[i]->W, sigmoid_layers[i]->b, NULL);//分别建立每个RBM单元,这里的inputsize是各个RBM的输入节点数
  }

  // layer for output using LogisticRegression
  log_layer = new LogisticRegression(N, hidden_layer_sizes[n_layers-1], n_outs);//建立一个逻辑回归层作为最后输出层
}

DBN::~DBN() {
  delete log_layer;

  for(int i=0; i<n_layers; i++) {
    delete sigmoid_layers[i];
    delete rbm_layers[i];
  }
  delete[] sigmoid_layers;
  delete[] rbm_layers;
}


void DBN::pretrain(int *input, double lr, int k, int epochs) { //训练过程,input为输入数据,lr为学习率,k是cd-k,epoch为训练轮次
  int *layer_input=NULL; //这个指针必须初始化,源代码没有初始化,下面也有同样问题
  int prev_layer_input_size;
  int *prev_layer_input;

  int *train_X = new int[n_ins];

  for(int i=0; i<n_layers; i++) {  // 逐层训练

    for(int epoch=0; epoch<epochs; epoch++) {  // training epochs

      for(int n=0; n<N; n++) { // input x1...xN
        // initial input
        for(int m=0; m<n_ins; m++) train_X[m] = input[n * n_ins + m];

        // layer input
        for(int l=0; l<=i; l++) {

          if(l == 0) {    //0层的输入数据layer_input[j] = train_X[j]
            layer_input = new int[n_ins];  
            for(int j=0; j<n_ins; j++) layer_input[j] = train_X[j];
          } 
		  else {      //  
            if(l == 1) prev_layer_input_size = n_ins;  //第一个隐含层的输入节点数为n_ins
            
			else prev_layer_input_size = hidden_layer_sizes[l-2];  //后面的隐含层输入节点数为其前一个隐含层的节点数, hidden_layer_sizes数组下标从0开始,所以是l-2

            prev_layer_input = new int[prev_layer_input_size];  //初始化每层的输入数组
            for(int j=0; j<prev_layer_input_size; j++) prev_layer_input[j] = layer_input[j];
            delete[] layer_input;

            layer_input = new int[hidden_layer_sizes[l-1]];

            sigmoid_layers[l-1]->sample_h_given_v(prev_layer_input, layer_input); // 得到其它层的layer input,是HiddenLayer::sample_h_given_v函数,layerinput是二值的
            delete[] prev_layer_input;
          }
        }

        rbm_layers[i]->contrastive_divergence(layer_input, lr, k); //训练每个RBM
      }

    }
  }

  delete[] train_X;
  delete[] layer_input;
}

void DBN::finetune(int *input, int *label, double lr, int epochs) { //用有标签的数据调优
  int *layer_input=NULL;  
  // int prev_layer_input_size;
  int *prev_layer_input;

  int *train_X = new int[n_ins];  
  int *train_Y = new int[n_outs];

  for(int epoch=0; epoch<epochs; epoch++) {
    for(int n=0; n<N; n++) { // input x1...xN
      // initial input
      for(int m=0; m<n_ins; m++)  train_X[m] = input[n * n_ins + m];
      for(int m=0; m<n_outs; m++) train_Y[m] = label[n * n_outs + m];

      // layer input
      for(int i=0; i<n_layers; i++) {
        if(i == 0) {
          prev_layer_input = new int[n_ins];
          for(int j=0; j<n_ins; j++) prev_layer_input[j] = train_X[j];
        } else {
          prev_layer_input = new int[hidden_layer_sizes[i-1]];
          for(int j=0; j<hidden_layer_sizes[i-1]; j++) prev_layer_input[j] = layer_input[j];
          delete[] layer_input;
        }


        layer_input = new int[hidden_layer_sizes[i]];
        sigmoid_layers[i]->sample_h_given_v(prev_layer_input, layer_input);
        delete[] prev_layer_input;
      }

      log_layer->train(layer_input, train_Y, lr);
    }
    // lr *= 0.95;
  }

  delete[] layer_input;
  delete[] train_X;
  delete[] train_Y;
}

void DBN::predict(int *x, double *y) {
  double *layer_input=NULL;
  // int prev_layer_input_size;
  double *prev_layer_input;

  double linear_output;

  prev_layer_input = new double[n_ins];
  for(int j=0; j<n_ins; j++) prev_layer_input[j] = x[j];

  // layer activation
  for(int i=0; i<n_layers; i++) {
    layer_input = new double[sigmoid_layers[i]->n_out];

    for(int k=0; k<sigmoid_layers[i]->n_out; k++) {
      linear_output = 0.0;

      for(int j=0; j<sigmoid_layers[i]->n_in; j++) {
        linear_output += sigmoid_layers[i]->W[k][j] * prev_layer_input[j];
      }
      linear_output += sigmoid_layers[i]->b[k];
      layer_input[k] = sigmoid(linear_output);
    }
    delete[] prev_layer_input;

    if(i < n_layers-1) {
      prev_layer_input = new double[sigmoid_layers[i]->n_out];
      for(int j=0; j<sigmoid_layers[i]->n_out; j++) prev_layer_input[j] = layer_input[j];
      delete[] layer_input;
    }
  }
  
  for(int i=0; i<log_layer->n_out; i++) {
    y[i] = 0;
    for(int j=0; j<log_layer->n_in; j++) {
      y[i] += log_layer->W[i][j] * layer_input[j];
    }
    y[i] += log_layer->b[i];
  }
  
  log_layer->softmax(y);


  delete[] layer_input;
}


// HiddenLayer
HiddenLayer::HiddenLayer(int size, int in, int out, double **w, double *bp) { //初始一个隐含层W[n_out][n_in],b[n_out]
  N = size; //样本数
  n_in = in; //输入层的节点个数
  n_out = out; //该隐层的节点个数

  if(w == NULL) {     
    W = new double*[n_out];
    for(int i=0; i<n_out; i++) W[i] = new double[n_in];
    double a = 1.0 / n_in;

    for(int i=0; i<n_out; i++) {
      for(int j=0; j<n_in; j++) {
        W[i][j] = uniform(-a, a);
      }
    }
  } else {
    W = w;
  }

  if(bp == NULL) {
    b = new double[n_out];
  } else {
    b = bp;
  }
}

HiddenLayer::~HiddenLayer() {
  for(int i=0; i<n_out; i++) delete W[i];
  delete[] W;
  delete[] b;
}

double HiddenLayer::output(int *input, double *w, double b) { //计算隐含层输出
  double linear_output = 0.0;
  for(int j=0; j<n_in; j++) {
    linear_output += w[j] * input[j];
  }
  linear_output += b;
  return sigmoid(linear_output);
}

void HiddenLayer::sample_h_given_v(int *input, int *sample) {   //对隐层输出二值化
  for(int i=0; i<n_out; i++) {
    sample[i] = binomial(1, output(input, W[i], b[i]));
  }
}


// RBM 
RBM::RBM(int size, int n_v, int n_h, double **w, double *hb, double *vb) {
  N = size;
  n_visible = n_v;
  n_hidden = n_h;

  if(w == NULL) {
    W = new double*[n_hidden];
    for(int i=0; i<n_hidden; i++) W[i] = new double[n_visible];
    double a = 1.0 / n_visible;

    for(int i=0; i<n_hidden; i++) {
      for(int j=0; j<n_visible; j++) {
        W[i][j] = uniform(-a, a);
      }
    }
  } else {
    W = w;
  }

  if(hb == NULL) {
    hbias = new double[n_hidden];
    for(int i=0; i<n_hidden; i++) hbias[i] = 0;
  } else {
    hbias = hb;
  }

  if(vb == NULL) {
    vbias = new double[n_visible];
    for(int i=0; i<n_visible; i++) vbias[i] = 0;
  } else {
    vbias = vb;
  }
}

RBM::~RBM() {
  // for(int i=0; i<n_hidden; i++) delete[] W[i];
  // delete[] W;
  // delete[] hbias;
  delete[] vbias;
}


void RBM::contrastive_divergence(int *input, double lr, int k) {
  double *ph_mean = new double[n_hidden];
  int *ph_sample = new int[n_hidden];
  double *nv_means = new double[n_visible];
  int *nv_samples = new int[n_visible];
  double *nh_means = new double[n_hidden];
  int *nh_samples = new int[n_hidden];

  /* CD-k */
  sample_h_given_v(input, ph_mean, ph_sample);

  for(int step=0; step<k; step++) {
    if(step == 0) {
      gibbs_hvh(ph_sample, nv_means, nv_samples, nh_means, nh_samples);
    } else {
      gibbs_hvh(nh_samples, nv_means, nv_samples, nh_means, nh_samples);
    }
  }

  for(int i=0; i<n_hidden; i++) {
    for(int j=0; j<n_visible; j++) {
      // W[i][j] += lr * (ph_sample[i] * input[j] - nh_means[i] * nv_samples[j]) / N;
      W[i][j] += lr * (ph_mean[i] * input[j] - nh_means[i] * nv_samples[j]) / N;
    }
    hbias[i] += lr * (ph_sample[i] - nh_means[i]) / N;
  }

  for(int i=0; i<n_visible; i++) {
    vbias[i] += lr * (input[i] - nv_samples[i]) / N;
  }

  delete[] ph_mean;
  delete[] ph_sample;
  delete[] nv_means;
  delete[] nv_samples;
  delete[] nh_means;
  delete[] nh_samples;
}

void RBM::sample_h_given_v(int *v0_sample, double *mean, int *sample) {
  for(int i=0; i<n_hidden; i++) {
    mean[i] = propup(v0_sample, W[i], hbias[i]);
    sample[i] = binomial(1, mean[i]);
  }
}

void RBM::sample_v_given_h(int *h0_sample, double *mean, int *sample) {
  for(int i=0; i<n_visible; i++) {
    mean[i] = propdown(h0_sample, i, vbias[i]);
    sample[i] = binomial(1, mean[i]);
  }
}

double RBM::propup(int *v, double *w, double b) {
  double pre_sigmoid_activation = 0.0;
  for(int j=0; j<n_visible; j++) {
    pre_sigmoid_activation += w[j] * v[j];
  }
  pre_sigmoid_activation += b;
  return sigmoid(pre_sigmoid_activation);
}

double RBM::propdown(int *h, int i, double b) {
  double pre_sigmoid_activation = 0.0;
  for(int j=0; j<n_hidden; j++) {
    pre_sigmoid_activation += W[j][i] * h[j];
  }
  pre_sigmoid_activation += b;
  return sigmoid(pre_sigmoid_activation);
}

void RBM::gibbs_hvh(int *h0_sample, double *nv_means, int *nv_samples, \
                    double *nh_means, int *nh_samples) {
  sample_v_given_h(h0_sample, nv_means, nv_samples);
  sample_h_given_v(nv_samples, nh_means, nh_samples);
}

void RBM::reconstruct(int *v, double *reconstructed_v) {
  double *h = new double[n_hidden];
  double pre_sigmoid_activation;

  for(int i=0; i<n_hidden; i++) {
    h[i] = propup(v, W[i], hbias[i]);
  }

  for(int i=0; i<n_visible; i++) {
    pre_sigmoid_activation = 0.0;
    for(int j=0; j<n_hidden; j++) {
      pre_sigmoid_activation += W[j][i] * h[j];
    }
    pre_sigmoid_activation += vbias[i];

    reconstructed_v[i] = sigmoid(pre_sigmoid_activation);
  }

  delete[] h;
}


// LogisticRegression
LogisticRegression::LogisticRegression(int size, int in, int out) {  //初始化一个逻辑回归层
  N = size;
  n_in = in;
  n_out = out;

  W = new double*[n_out];
  for(int i=0; i<n_out; i++) W[i] = new double[n_in];
  b = new double[n_out];

  for(int i=0; i<n_out; i++) {
    for(int j=0; j<n_in; j++) {
      W[i][j] = 0;
    }
    b[i] = 0;
  }
}

LogisticRegression::~LogisticRegression() {
  for(int i=0; i<n_out; i++) delete[] W[i];
  delete[] W;
  delete[] b;
}


void LogisticRegression::train(int *x, int *y, double lr) {  //就像BP神经网络一样训练
  double *p_y_given_x = new double[n_out];
  double *dy = new double[n_out];

  for(int i=0; i<n_out; i++) {     //正向传播,得到最后输出
    p_y_given_x[i] = 0;
    for(int j=0; j<n_in; j++) {
      p_y_given_x[i] += W[i][j] * x[j];
    }
    p_y_given_x[i] += b[i];
  }
  softmax(p_y_given_x);

  for(int i=0; i<n_out; i++) {  //反向传播,跟新权值和偏置
    dy[i] = y[i] - p_y_given_x[i];

    for(int j=0; j<n_in; j++) {
      W[i][j] += lr * dy[i] * x[j] / N;
    }

    b[i] += lr * dy[i] / N;
  }
  
  delete[] p_y_given_x;
  delete[] dy;
}

void LogisticRegression::softmax(double *x) {
  double max = 0.0;
  double sum = 0.0;
  
  for(int i=0; i<n_out; i++) if(max < x[i]) max = x[i];
  for(int i=0; i<n_out; i++) {
    x[i] = exp(x[i] - max);
    sum += x[i];
  } 

  for(int i=0; i<n_out; i++) x[i] /= sum;
}

void LogisticRegression::predict(int *x, double *y) { //该层网络正向跑一遍
  for(int i=0; i<n_out; i++) {
    y[i] = 0;
    for(int j=0; j<n_in; j++) {
      y[i] += W[i][j] * x[j];
    }
    y[i] += b[i];
  }

  softmax(y);
}





void test_dbn() {
  srand(0);

  double pretrain_lr = 0.1;
  int pretraining_epochs = 1000;
  int k = 1;
  double finetune_lr = 0.1;
  int finetune_epochs = 500;

  int train_N = 6;
  int test_N = 3;
  int n_ins = 6;
  int n_outs = 2;
  int hidden_layer_sizes[] = {3, 3};
  int n_layers = sizeof(hidden_layer_sizes) / sizeof(hidden_layer_sizes[0]);

  // training data
  int train_X[6][6] = {
    {1, 1, 1, 0, 0, 0},
    {1, 0, 1, 0, 0, 0},
    {1, 1, 1, 0, 0, 0},
    {0, 0, 1, 1, 1, 0},
    {0, 0, 1, 1, 0, 0},
    {0, 0, 1, 1, 1, 0}
  };

  int train_Y[6][2] = {
    {1, 0},
    {1, 0},
    {1, 0},
    {0, 1},
    {0, 1},
    {0, 1}
  };


  
  // construct DBN
  DBN dbn(train_N, n_ins, hidden_layer_sizes, n_outs, n_layers);

  // pretrain
  dbn.pretrain(*train_X, pretrain_lr, k, pretraining_epochs);

  // finetune
  dbn.finetune(*train_X, *train_Y, finetune_lr, finetune_epochs);
  

  // test data
  int test_X[3][6] = {
    {1, 1, 0, 0, 0, 0},
    {0, 0, 0, 1, 1, 0},
    {1, 1, 1, 1, 1, 0}
  };

  double test_Y[3][2];


  // test
  for(int i=0; i<test_N; i++) {
    dbn.predict(test_X[i], test_Y[i]);
    for(int j=0; j<n_outs; j++) {
      cout << test_Y[i][j] << " ";
    }
    cout << endl;
  }

}

int main() {
  test_dbn();
  return 0;
}

运行结果是:

从训练数据来看,这个结果还是比较正确的。




  • 2
    点赞
  • 12
    收藏
    觉得还不错? 一键收藏
  • 17
    评论
Code provided by Ruslan Salakhutdinov and Geoff Hinton Permission is granted for anyone to copy, use, modify, or distribute this program and accompanying programs and documents for any purpose, provided this copyright notice is retained and prominently displayed, along with a note saying that the original programs are available from our web page. The programs and documents are distributed without any warranty, express or implied. As the programs were written for research purposes only, they have not been tested to the degree that would be advisable in any important application. All use of these programs is entirely at the user's own risk. How to make it work: 1. Create a separate directory and download all these files into the same directory 2. Download from http://yann.lecun.com/exdb/mnist the following 4 files: o train-images-idx3-ubyte.gz o train-labels-idx1-ubyte.gz o t10k-images-idx3-ubyte.gz o t10k-labels-idx1-ubyte.gz 3. Unzip these 4 files by executing: o gunzip train-images-idx3-ubyte.gz o gunzip train-labels-idx1-ubyte.gz o gunzip t10k-images-idx3-ubyte.gz o gunzip t10k-labels-idx1-ubyte.gz If unzipping with WinZip, make sure the file names have not been changed by Winzip. 4. Download Conjugate Gradient code minimize.m 5. Download Autoencoder_Code.tar which contains 13 files OR download each of the following 13 files separately for training an autoencoder and a classification model: o mnistdeepauto.m Main file for training deep autoencoder o mnistclassify.m Main file for training classification model o converter.m Converts raw MNIST digits into matlab format o rbm.m Training RBM with binary hidden and binary visible units o rbmhidlinear.m Training RBM with Gaussian hidden and binary visible units o backprop.m Backpropagation for fine-tuning an autoencoder o backpropclassify.m Backpropagation for classification using "encoder" network o CG_MNIST.m Conjugate Gradient optimization for fine-tuning an autoencoder o CG_CLASSIFY_INIT.m Co
评论 17
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值