Spark MLlib Deep Learning Neural Net(深度学习-神经网络)1.1

最新推荐文章于 2024-08-10 07:50:08 发布

ctgu_aiqinhai

最新推荐文章于 2024-08-10 07:50:08 发布

阅读量1.3k

点赞数

分类专栏：深度学习

深度学习专栏收录该内容

18 篇文章 4 订阅

订阅专栏

Spark MLlib Deep Learning Neural Net(深度学习-神经网络)1.1

http://blog.csdn.net/sunbow0

Spark MLlib Deep Learning工具箱，是根据现有深度学习教程《UFLDL教程》中的算法，在SparkMLlib中的实现。具体Spark MLlib Deep Learning(深度学习)目录结构：

第一章Neural Net(NN)

1、源码

2、源码解析

3、实例

第二章Deep Belief Nets(DBNs)

第三章Convolution Neural Network(CNN)

第四章 Stacked Auto-Encoders(SAE)

第五章CAE

第一章Neural Net(神经网络)

1源码

目前Spark MLlib Deep Learning工具箱源码的github地址为：

https://github.com/sunbow1/SparkMLlibDeepLearn

1.1 NeuralNet代码

[java]view plaincopy 
   
 package NN  
   
 import org.apache.spark._  
 import org.apache.spark.SparkContext._  
 import org.apache.spark.rdd.RDD  
 import org.apache.spark.Logging  
 import org.apache.spark.mllib.regression.LabeledPoint  
 import org.apache.spark.mllib.linalg._  
 import org.apache.spark.mllib.linalg.distributed.RowMatrix  
   
 import breeze.linalg.{  
   Matrix => BM,  
   CSCMatrix => BSM,  
   DenseMatrix => BDM,  
   Vector => BV,  
   DenseVector => BDV,  
   SparseVector => BSV,  
   axpy => brzAxpy,  
   svd => brzSvd  
 }  
 import breeze.numerics.{  
   exp => Bexp,  
   tanh => Btanh  
 }  
   
 import scala.collection.mutable.ArrayBuffer  
 import java.util.Random  
 import scala.math._  
   
 /** 
  * label：目标矩阵 
  * nna：神经网络每层节点的输出值,a(0),a(1),a(2) 
  * error：输出层与目标值的误差矩阵 
  */  
 case class NNLabel(label: BDM[Double], nna: ArrayBuffer[BDM[Double]], error: BDM[Double]) extends Serializable  
   
 /** 
  * 配置参数 
  */  
 case class NNConfig(  
   size: Array[Int],  
   layer: Int,  
   activation_function: String,  
   learningRate: Double,  
   momentum: Double,  
   scaling_learningRate: Double,  
   weightPenaltyL2: Double,  
   nonSparsityPenalty: Double,  
   sparsityTarget: Double,  
   inputZeroMaskedFraction: Double,  
   dropoutFraction: Double,  
   testing: Double,  
   output_function: String) extends Serializable  
   
 /** 
  * NN(neural network) 
  */  
   
 class NeuralNet(  
   private var size: Array[Int],  
   private var layer: Int,  
   private var activation_function: String,  
   private var learningRate: Double,  
   private var momentum: Double,  
   private var scaling_learningRate: Double,  
   private var weightPenaltyL2: Double,  
   private var nonSparsityPenalty: Double,  
   private var sparsityTarget: Double,  
   private var inputZeroMaskedFraction: Double,  
   private var dropoutFraction: Double,  
   private var testing: Double,  
   private var output_function: String) extends Serializable with Logging {  
   //          var size=Array(5, 7, 1)  
   //          var layer=3  
   //          var activation_function="tanh_opt"  
   //          var learningRate=2.0  
   //          var momentum=0.5  
   //          var scaling_learningRate=1.0  
   //          var weightPenaltyL2=0.0  
   //          var nonSparsityPenalty=0.0  
   //          var sparsityTarget=0.05  
   //          var inputZeroMaskedFraction=0.0  
   //          var dropoutFraction=0.0  
   //          var testing=0.0  
   //          var output_function="sigm"  
   /** 
    * size = architecture; 
    * n = numel(nn.size); 
    * activation_function = sigm   隐含层函数Activation functions of hidden layers: 'sigm' (sigmoid) or 'tanh_opt' (optimal tanh). 
    * learningRate = 2;            学习率learning rate Note: typically needs to be lower when using 'sigm' activation function and non-normalized inputs. 
    * momentum = 0.5;              Momentum 
    * scaling_learningRate = 1;    Scaling factor for the learning rate (each epoch) 
    * weightPenaltyL2  = 0;        正则化L2 regularization 
    * nonSparsityPenalty = 0;      权重稀疏度惩罚值on sparsity penalty 
    * sparsityTarget = 0.05;       Sparsity target 
    * inputZeroMaskedFraction = 0; 加入noise,Used for Denoising AutoEncoders 
    * dropoutFraction = 0;         每一次mini-batch样本输入训练时，随机扔掉x%的隐含层节点Dropout level (http://www.cs.toronto.edu/~hinton/absps/dropout.pdf) 
    * testing = 0;                 Internal variable. nntest sets this to one. 
    * output = 'sigm';             输出函数output unit 'sigm' (=logistic), 'softmax' and 'linear'   * 
    */  
   def this() = this(NeuralNet.Architecture, 3, NeuralNet.Activation_Function, 2.0, 0.5, 1.0, 0.0, 0.0, 0.05, 0.0, 0.0, 0.0, NeuralNet.Output)  
   
   /** 设置神经网络结构. Default: [10, 5, 1]. */  
   def setSize(size: Array[Int]): this.type = {  
     this.size = size  
     this  
   }  
   
   /** 设置神经网络层数据. Default: 3. */  
   def setLayer(layer: Int): this.type = {  
     this.layer = layer  
     this  
   }  
   
   /** 设置隐含层函数. Default: sigm. */  
   def setActivation_function(activation_function: String): this.type = {  
     this.activation_function = activation_function  
     this  
   }  
   
   /** 设置学习率因子. Default: 2. */  
   def setLearningRate(learningRate: Double): this.type = {  
     this.learningRate = learningRate  
     this  
   }  
   
   /** 设置Momentum. Default: 0.5. */  
   def setMomentum(momentum: Double): this.type = {  
     this.momentum = momentum  
     this  
   }  
   
   /** 设置scaling_learningRate. Default: 1. */  
   def setScaling_learningRate(scaling_learningRate: Double): this.type = {  
     this.scaling_learningRate = scaling_learningRate  
     this  
   }  
   
   /** 设置正则化L2因子. Default: 0. */  
   def setWeightPenaltyL2(weightPenaltyL2: Double): this.type = {  
     this.weightPenaltyL2 = weightPenaltyL2  
     this  
   }  
   
   /** 设置权重稀疏度惩罚因子. Default: 0. */  
   def setNonSparsityPenalty(nonSparsityPenalty: Double): this.type = {  
     this.nonSparsityPenalty = nonSparsityPenalty  
     this  
   }  
   
   /** 设置权重稀疏度目标值. Default: 0.05. */  
   def setSparsityTarget(sparsityTarget: Double): this.type = {  
     this.sparsityTarget = sparsityTarget  
     this  
   }  
   
   /** 设置权重加入噪声因子. Default: 0. */  
   def setInputZeroMaskedFraction(inputZeroMaskedFraction: Double): this.type = {  
     this.inputZeroMaskedFraction = inputZeroMaskedFraction  
     this  
   }  
   
   /** 设置权重Dropout因子. Default: 0. */  
   def setDropoutFraction(dropoutFraction: Double): this.type = {  
     this.dropoutFraction = dropoutFraction  
     this  
   }  
   
   /** 设置testing. Default: 0. */  
   def setTesting(testing: Double): this.type = {  
     this.testing = testing  
     this  
   }  
   
   /** 设置输出函数. Default: linear. */  
   def setOutput_function(output_function: String): this.type = {  
     this.output_function = output_function  
     this  
   }  
   
   /** 
    * 运行神经网络算法. 
    */  
   def NNtrain(train_d: RDD[(BDM[Double], BDM[Double])], opts: Array[Double]): NeuralNetModel = {  
     val sc = train_d.sparkContext  
     var initStartTime = System.currentTimeMillis()  
     var initEndTime = System.currentTimeMillis()  
     // 参数配置 广播配置  
     var nnconfig = NNConfig(size, layer, activation_function, learningRate, momentum, scaling_learningRate,  
       weightPenaltyL2, nonSparsityPenalty, sparsityTarget, inputZeroMaskedFraction, dropoutFraction, testing,  
       output_function)  
     // 初始化权重  
     var nn_W = NeuralNet.InitialWeight(size)  
     var nn_vW = NeuralNet.InitialWeightV(size)  
     //        val tmpw = nn_W(1)  
     //        for (i <- 0 to tmpw.rows -1) {  
     //          for (j <- 0 to tmpw.cols - 1) {  
     //            print(tmpw(i, j) + "\t")  
     //          }  
     //          println()  
     //        }  
   
     // 初始化每层的平均激活度nn.p  
     // average activations (for use with sparsity)  
     var nn_p = NeuralNet.InitialActiveP(size)  
   
     // 样本数据划分：训练数据、交叉检验数据  
     val validation = opts(2)  
     val splitW1 = Array(1.0 - validation, validation)  
     val train_split1 = train_d.randomSplit(splitW1, System.nanoTime())  
     val train_t = train_split1(0)  
     val train_v = train_split1(1)  
   
     // m:训练样本的数量  
     val m = train_t.count  
     // batchsize是做batch gradient时候的大小   
     // 计算batch的数量  
     val batchsize = opts(0).toInt  
     val numepochs = opts(1).toInt  
     val numbatches = (m / batchsize).toInt  
     var L = Array.fill(numepochs * numbatches.toInt)(0.0)  
     var n = 0  
     var loss_train_e = Array.fill(numepochs)(0.0)  
     var loss_val_e = Array.fill(numepochs)(0.0)  
     // numepochs是循环的次数   
     for (i <- 1 to numepochs) {  
       initStartTime = System.currentTimeMillis()  
       val splitW2 = Array.fill(numbatches)(1.0 / numbatches)  
       // 根据分组权重，随机划分每组样本数据    
       val bc_config = sc.broadcast(nnconfig)  
       for (l <- 1 to numbatches) {  
         // 权重   
         val bc_nn_W = sc.broadcast(nn_W)  
         val bc_nn_vW = sc.broadcast(nn_vW)  
   
         //        println(i + "\t" + l)  
         //        val tmpw0 = bc_nn_W.value(0)  
         //        for (i <- 0 to tmpw0.rows - 1) {  
         //          for (j <- 0 to tmpw0.cols - 1) {  
         //            print(tmpw0(i, j) + "\t")  
         //          }  
         //          println()  
         //        }  
         //        val tmpw1 = bc_nn_W.value(1)  
         //        for (i <- 0 to tmpw1.rows - 1) {  
         //          for (j <- 0 to tmpw1.cols - 1) {  
         //            print(tmpw1(i, j) + "\t")  
         //          }  
         //          println()  
         //        }  
   
         // 样本划分  
         val train_split2 = train_t.randomSplit(splitW2, System.nanoTime())  
         val batch_xy1 = train_split2(l - 1)  
         //        val train_split3 = train_t.filter { f => (f._1 >= batchsize * (l - 1) + 1) && (f._1 <= batchsize * (l)) }  
         //        val batch_xy1 = train_split3.map(f => (f._2, f._3))  
         // Add noise to input (for use in denoising autoencoder)  
         // 加入noise，这是denoising autoencoder需要使用到的部分    
         // 这部分请参见《Extracting and Composing Robust Features with Denoising Autoencoders》这篇论文    
         // 具体加入的方法就是把训练样例中的一些数据调整变为0，inputZeroMaskedFraction表示了调整的比例    
         //val randNoise = NeuralNet.RandMatrix(batch_x.numRows.toInt, batch_x.numCols.toInt, inputZeroMaskedFraction)  
         val batch_xy2 = if (bc_config.value.inputZeroMaskedFraction != 0) {  
           NeuralNet.AddNoise(batch_xy1, bc_config.value.inputZeroMaskedFraction)  
         } else batch_xy1  
   
         //        val tmpxy = batch_xy2.map(f => (f._1.toArray,f._2.toArray)).toArray.map {f => ((new ArrayBuffer() ++ f._1) ++ f._2).toArray}  
         //        for (i <- 0 to tmpxy.length - 1) {  
         //          for (j <- 0 to tmpxy(i).length - 1) {  
         //            print(tmpxy(i)(j) + "\t")  
         //          }  
         //          println()  
         //        }  
   
         // NNff是进行前向传播  
         // nn = nnff(nn, batch_x, batch_y);  
         val train_nnff = NeuralNet.NNff(batch_xy2, bc_config, bc_nn_W)  
   
         //        val tmpa0 = train_nnff.map(f => f._1.nna(0)).take(20)  
         //        println("tmpa0")  
         //        for (i <- 0 to 10) {  
         //          for (j <- 0 to tmpa0(i).cols - 1) {  
         //            print(tmpa0(i)(0, j) + "\t")  
         //          }  
         //          println()  
         //        }  
         //        val tmpa1 = train_nnff.map(f => f._1.nna(1)).take(20)  
         //        println("tmpa1")  
         //        for (i <- 0 to 10) {  
         //          for (j <- 0 to tmpa1(i).cols - 1) {  
         //            print(tmpa1(i)(0, j) + "\t")  
         //          }  
         //          println()  
         //        }  
         //        val tmpa2 = train_nnff.map(f => f._1.nna(2)).take(20)  
         //        println("tmpa2")  
         //        for (i <- 0 to 10) {  
         //          for (j <- 0 to tmpa2(i).cols - 1) {  
         //            print(tmpa2(i)(0, j) + "\t")  
         //          }  
         //          println()  
         //        }  
   
         // sparsity计算，计算每层节点的平均稀疏度  
         nn_p = NeuralNet.ActiveP(train_nnff, bc_config, nn_p)  
         val bc_nn_p = sc.broadcast(nn_p)  
   
         // NNbp是后向传播  
         // nn = nnbp(nn);  
         val train_nnbp = NeuralNet.NNbp(train_nnff, bc_config, bc_nn_W, bc_nn_p)  
   
         //        val tmpd0 = rdd5.map(f => f._2(2)).take(20)  
         //        println("tmpd0")  
         //        for (i <- 0 to 10) {  
         //          for (j <- 0 to tmpd0(i).cols - 1) {  
         //            print(tmpd0(i)(0, j) + "\t")  
         //          }  
         //          println()  
         //        }  
         //        val tmpd1 = rdd5.map(f => f._2(1)).take(20)  
         //        println("tmpd1")  
         //        for (i <- 0 to 10) {  
         //          for (j <- 0 to tmpd1(i).cols - 1) {  
         //            print(tmpd1(i)(0, j) + "\t")  
         //          }  
         //          println()  
         //        }  
         //        val tmpdw0 = rdd5.map(f => f._3(0)).take(20)  
         //        println("tmpdw0")  
         //        for (i <- 0 to 10) {  
         //          for (j <- 0 to tmpdw0(i).cols - 1) {  
         //            print(tmpdw0(i)(0, j) + "\t")  
         //          }  
         //          println()  
         //        }  
         //        val tmpdw1 = rdd5.map(f => f._3(1)).take(20)  
         //        println("tmpdw1")  
         //        for (i <- 0 to 10) {  
         //          for (j <- 0 to tmpdw1(i).cols - 1) {  
         //            print(tmpdw1(i)(0, j) + "\t")  
         //          }  
         //          println()  
         //        }  
   
         // nn = NNapplygrads(nn) returns an neural network structure with updated  
         // weights and biases  
         // 更新权重参数：w=w-α*[dw + λw]      
         val train_nnapplygrads = NeuralNet.NNapplygrads(train_nnbp, bc_config, bc_nn_W, bc_nn_vW)  
         nn_W = train_nnapplygrads(0)  
         nn_vW = train_nnapplygrads(1)  
   
         //        val tmpw2 = train_nnapplygrads(0)(0)  
         //        for (i <- 0 to tmpw2.rows - 1) {  
         //          for (j <- 0 to tmpw2.cols - 1) {  
         //            print(tmpw2(i, j) + "\t")  
         //          }  
         //          println()  
         //        }  
         //        val tmpw3 = train_nnapplygrads(0)(1)  
         //        for (i <- 0 to tmpw3.rows - 1) {  
         //          for (j <- 0 to tmpw3.cols - 1) {  
         //            print(tmpw3(i, j) + "\t")  
         //          }  
         //          println()  
         //        }  
   
         // error and loss  
         // 输出误差计算  
         val loss1 = train_nnff.map(f => f._1.error)  
         val (loss2, counte) = loss1.treeAggregate((0.0, 0L))(  
           seqOp = (c, v) => {  
             // c: (e, count), v: (m)  
             val e1 = c._1  
             val e2 = (v :* v).sum  
             val esum = e1 + e2  
             (esum, c._2 + 1)  
           },  
           combOp = (c1, c2) => {  
             // c: (e, count)  
             val e1 = c1._1  
             val e2 = c2._1  
             val esum = e1 + e2  
             (esum, c1._2 + c2._2)  
           })  
         val Loss = loss2 / counte.toDouble  
         L(n) = Loss * 0.5  
         n = n + 1  
       }  
       // 计算本次迭代的训练误差及交叉检验误差  
       // Full-batch train mse  
       val evalconfig = NNConfig(size, layer, activation_function, learningRate, momentum, scaling_learningRate,  
         weightPenaltyL2, nonSparsityPenalty, sparsityTarget, inputZeroMaskedFraction, dropoutFraction, 1.0,  
         output_function)  
       loss_train_e(i - 1) = NeuralNet.NNeval(train_t, sc.broadcast(evalconfig), sc.broadcast(nn_W))  
       if (validation > 0) loss_val_e(i - 1) = NeuralNet.NNeval(train_v, sc.broadcast(evalconfig), sc.broadcast(nn_W))  
   
       // 更新学习因子  
       // nn.learningRate = nn.learningRate * nn.scaling_learningRate;  
       nnconfig = NNConfig(size, layer, activation_function, nnconfig.learningRate * nnconfig.scaling_learningRate, momentum, scaling_learningRate,  
         weightPenaltyL2, nonSparsityPenalty, sparsityTarget, inputZeroMaskedFraction, dropoutFraction, testing,  
         output_function)  
       initEndTime = System.currentTimeMillis()  
   
       // 打印输出结果  
       printf("epoch: numepochs = %d , Took = %d seconds; Full-batch train mse = %f, val mse = %f.\n", i, scala.math.ceil((initEndTime - initStartTime).toDouble / 1000).toLong, loss_train_e(i - 1), loss_val_e(i - 1))  
     }  
     val configok = NNConfig(size, layer, activation_function, learningRate, momentum, scaling_learningRate,  
       weightPenaltyL2, nonSparsityPenalty, sparsityTarget, inputZeroMaskedFraction, dropoutFraction, 1.0,  
       output_function)  
     new NeuralNetModel(configok, nn_W)  
   }  
   
 }  
   
 /** 
  * NN(neural network) 
  */  
 object NeuralNet extends Serializable {  
   
   // Initialization mode names  
   val Activation_Function = "sigm"  
   val Output = "linear"  
   val Architecture = Array(10, 5, 1)  
   
   /** 
    * 增加随机噪声 
    * 若随机值>=Fraction，值不变，否则改为0 
    */  
   def AddNoise(rdd: RDD[(BDM[Double], BDM[Double])], Fraction: Double): RDD[(BDM[Double], BDM[Double])] = {  
     val addNoise = rdd.map { f =>  
       val features = f._2  
       val a = BDM.rand[Double](features.rows, features.cols)  
       val a1 = a :>= Fraction  
       val d1 = a1.data.map { f => if (f == true) 1.0 else 0.0 }  
       val a2 = new BDM(features.rows, features.cols, d1)  
       val features2 = features :* a2  
       (f._1, features2)  
     }  
     addNoise  
   }  
   
   /** 
    * 初始化权重 
    * 初始化为一个很小的、接近零的随机值 
    */  
   def InitialWeight2(size: Array[Int]): Array[BDM[Double]] = {  
     // 初始化权重参数  
     // weights and weight momentum  
     // nn.W{i - 1} = (rand(nn.size(i), nn.size(i - 1)+1) - 0.5) * 2 * 4 * sqrt(6 / (nn.size(i) + nn.size(i - 1)));  
     // nn.vW{i - 1} = zeros(size(nn.W{i - 1}));  
     val n = size.length  
     val nn_W = ArrayBuffer[BDM[Double]]()  
     val d1 = BDM((2.54631575950577, -2.72375471180638, -1.83131523622017, -0.832303531504013, -1.28869970471936, -0.460188104184124), (-1.52091024201213, 1.81815348316090, -0.533406209340414, 1.77153723107141, -1.70376378930231, 1.95852409868481), (0.604392922735100, -0.312805008341265, 2.46338861792203, -2.77264318419692, -2.74202474572555, 0.142284005609256), (-0.0792951314491902, 0.652983968878905, 2.35836765255640, -2.04274164893227, 1.39603060318734, -1.68208055847319), (2.21352121948139, 1.65144527075334, -0.507588360889342, -1.68141383648426, -0.310581480324221, 0.973756570035639), (1.48264358368951, 2.38613449604874, 2.22681802175890, -1.70428719030501, 2.44271213316363, 1.91268676272635), (-0.246256073282793, 1.34750367072394, -2.50094445126864, 0.587138926992906, -0.192365052800164, -2.71732925728203))  
     nn_W += d1  
     val d2 = BDM((1.25592501437006, -0.834980000207940, 2.29875024099543, 0.0194882319892158, 1.45126037957791, -0.492648144141757, -1.35365058999520, -2.15014190874756))  
     nn_W += d2  
     nn_W.toArray  
   }  
   def InitialWeight(size: Array[Int]): Array[BDM[Double]] = {  
     // 初始化权重参数  
     // weights and weight momentum  
     // nn.W{i - 1} = (rand(nn.size(i), nn.size(i - 1)+1) - 0.5) * 2 * 4 * sqrt(6 / (nn.size(i) + nn.size(i - 1)));  
     // nn.vW{i - 1} = zeros(size(nn.W{i - 1}));  
     val n = size.length  
     val nn_W = ArrayBuffer[BDM[Double]]()  
     for (i <- 1 to n - 1) {  
       val d1 = BDM.rand(size(i), size(i - 1) + 1)  
       d1 :-= 0.5  
       val f1 = 2 * 4 * sqrt(6.0 / (size(i) + size(i - 1)))  
       val d2 = d1 :* f1  
       //val d3 = new DenseMatrix(d2.rows, d2.cols, d2.data, d2.isTranspose)  
       //val d4 = Matrices.dense(d2.rows, d2.cols, d2.data)  
       nn_W += d2  
     }  
     nn_W.toArray  
   }  
   
   /** 
    * 初始化权重vW 
    * 初始化为0 
    */  
   def InitialWeightV(size: Array[Int]): Array[BDM[Double]] = {  
     // 初始化权重参数  
     // weights and weight momentum  
     // nn.vW{i - 1} = zeros(size(nn.W{i - 1}));  
     val n = size.length  
     val nn_vW = ArrayBuffer[BDM[Double]]()  
     for (i <- 1 to n - 1) {  
       val d1 = BDM.zeros[Double](size(i), size(i - 1) + 1)  
       nn_vW += d1  
     }  
     nn_vW.toArray  
   }  
   
   /** 
    * 初始每一层的平均激活度 
    * 初始化为0 
    */  
   def InitialActiveP(size: Array[Int]): Array[BDM[Double]] = {  
     // 初始每一层的平均激活度  
     // average activations (for use with sparsity)  
     // nn.p{i}     = zeros(1, nn.size(i));    
     val n = size.length  
     val nn_p = ArrayBuffer[BDM[Double]]()  
     nn_p += BDM.zeros[Double](1, 1)  
     for (i <- 1 to n - 1) {  
       val d1 = BDM.zeros[Double](1, size(i))  
       nn_p += d1  
     }  
     nn_p.toArray  
   }  
   
   /** 
    * 随机让网络某些隐含层节点的权重不工作 
    * 若随机值>=Fraction，矩阵值不变，否则改为0 
    */  
   def DropoutWeight(matrix: BDM[Double], Fraction: Double): Array[BDM[Double]] = {  
     val aa = BDM.rand[Double](matrix.rows, matrix.cols)  
     val aa1 = aa :> Fraction  
     val d1 = aa1.data.map { f => if (f == true) 1.0 else 0.0 }  
     val aa2 = new BDM(matrix.rows: Int, matrix.cols: Int, d1: Array[Double])  
     val matrix2 = matrix :* aa2  
     Array(aa2, matrix2)  
   }  
   
   /** 
    * sigm激活函数 
    * X = 1./(1+exp(-P)); 
    */  
   def sigm(matrix: BDM[Double]): BDM[Double] = {  
     val s1 = 1.0 / (Bexp(matrix * (-1.0)) + 1.0)  
     s1  
   }  
   
   /** 
    * tanh激活函数 
    * f=1.7159*tanh(2/3.*A); 
    */  
   def tanh_opt(matrix: BDM[Double]): BDM[Double] = {  
     val s1 = Btanh(matrix * (2.0 / 3.0)) * 1.7159  
     s1  
   }  
   
   /** 
    * nnff是进行前向传播 
    * 计算神经网络中的每个节点的输出值; 
    */  
   def NNff(  
     batch_xy2: RDD[(BDM[Double], BDM[Double])],  
     bc_config: org.apache.spark.broadcast.Broadcast[NNConfig],  
     bc_nn_W: org.apache.spark.broadcast.Broadcast[Array[BDM[Double]]]): RDD[(NNLabel, Array[BDM[Double]])] = {  
     // 第1层:a(1)=[1 x]  
     // 增加偏置项b  
     val train_data1 = batch_xy2.map { f =>  
       val lable = f._1  
       val features = f._2  
       val nna = ArrayBuffer[BDM[Double]]()  
       val Bm1 = new BDM(features.rows, 1, Array.fill(features.rows * 1)(1.0))  
       val features2 = BDM.horzcat(Bm1, features)  
       val error = BDM.zeros[Double](lable.rows, lable.cols)  
       nna += features2  
       NNLabel(lable, nna, error)  
     }  
   
     //    println("bc_size " + bc_config.value.size(0) + bc_config.value.size(1) + bc_config.value.size(2))  
     //    println("bc_layer " + bc_config.value.layer)  
     //    println("bc_activation_function " + bc_config.value.activation_function)  
     //    println("bc_output_function " + bc_config.value.output_function)  
     //  
     //    println("tmpw0 ")  
     //    val tmpw0 = bc_nn_W.value(0)  
     //    for (i <- 0 to tmpw0.rows - 1) {  
     //      for (j <- 0 to tmpw0.cols - 1) {  
     //        print(tmpw0(i, j) + "\t")  
     //      }  
     //      println()  
     //    }  
   
     // feedforward pass  
     // 第2至n-1层计算，a(i)=f(a(i-1)*w(i-1)')  
     //val tmp1 = train_data1.map(f => f.nna(0).data).take(1)(0)  
     //val tmp2 = new BDM(1, tmp1.length, tmp1)  
     //val nn_a = ArrayBuffer[BDM[Double]]()  
     //nn_a += tmp2  
     val train_data2 = train_data1.map { f =>  
       val nn_a = f.nna  
       val dropOutMask = ArrayBuffer[BDM[Double]]()  
       dropOutMask += new BDM[Double](1, 1, Array(0.0))  
       for (j <- 1 to bc_config.value.layer - 2) {  
         // 计算每层输出  
         // Calculate the unit's outputs (including the bias term)  
         // nn.a{i} = sigm(nn.a{i - 1} * nn.W{i - 1}')  
         // nn.a{i} = tanh_opt(nn.a{i - 1} * nn.W{i - 1}');              
         val A1 = nn_a(j - 1)  
         val W1 = bc_nn_W.value(j - 1)  
         val aw1 = A1 * W1.t  
         val nnai1 = bc_config.value.activation_function match {  
           case "sigm" =>  
             val aw2 = NeuralNet.sigm(aw1)  
             aw2  
           case "tanh_opt" =>  
             val aw2 = NeuralNet.tanh_opt(aw1)  
             //val aw2 = Btanh(aw1 * (2.0 / 3.0)) * 1.7159  
             aw2  
         }  
         // dropout计算  
         // Dropout是指在模型训练时随机让网络某些隐含层节点的权重不工作，不工作的那些节点可以暂时认为不是网络结构的一部分  
         // 但是它的权重得保留下来（只是暂时不更新而已），因为下次样本输入时它可能又得工作了  
         // 参照 http://www.cnblogs.com/tornadomeet/p/3258122.html     
         val dropoutai = if (bc_config.value.dropoutFraction > 0) {  
           if (bc_config.value.testing == 1) {  
             val nnai2 = nnai1 * (1.0 - bc_config.value.dropoutFraction)  
             Array(new BDM[Double](1, 1, Array(0.0)), nnai2)  
           } else {  
             NeuralNet.DropoutWeight(nnai1, bc_config.value.dropoutFraction)  
           }  
         } else {  
           val nnai2 = nnai1  
           Array(new BDM[Double](1, 1, Array(0.0)), nnai2)  
         }  
         val nnai2 = dropoutai(1)  
         dropOutMask += dropoutai(0)  
         // Add the bias term  
         // 增加偏置项b  
         // nn.a{i} = [ones(m,1) nn.a{i}];  
         val Bm1 = BDM.ones[Double](nnai2.rows, 1)  
         val nnai3 = BDM.horzcat(Bm1, nnai2)  
         nn_a += nnai3  
       }  
       (NNLabel(f.label, nn_a, f.error), dropOutMask.toArray)  
     }  
   
     // 输出层计算  
     val train_data3 = train_data2.map { f =>  
       val nn_a = f._1.nna  
       // nn.a{n} = sigm(nn.a{n - 1} * nn.W{n - 1}');  
       // nn.a{n} = nn.a{n - 1} * nn.W{n - 1}';            
       val An1 = nn_a(bc_config.value.layer - 2)  
       val Wn1 = bc_nn_W.value(bc_config.value.layer - 2)  
       val awn1 = An1 * Wn1.t  
       val nnan1 = bc_config.value.output_function match {  
         case "sigm" =>  
           val awn2 = NeuralNet.sigm(awn1)  
           //val awn2 = 1.0 / (Bexp(awn1 * (-1.0)) + 1.0)  
           awn2  
         case "linear" =>  
           val awn2 = awn1  
           awn2  
       }  
       nn_a += nnan1  
       (NNLabel(f._1.label, nn_a, f._1.error), f._2)  
     }  
   
     // error and loss  
     // 输出误差计算  
     // nn.e = y - nn.a{n};  
     // val nn_e = batch_y - nnan  
     val train_data4 = train_data3.map { f =>  
       val batch_y = f._1.label  
       val nnan = f._1.nna(bc_config.value.layer - 1)  
       val error = (batch_y - nnan)  
       (NNLabel(f._1.label, f._1.nna, error), f._2)  
     }  
     train_data4  
   }  
   
   /** 
    * sparsity计算，网络稀疏度 
    * 计算每个节点的平均值 
    */  
   def ActiveP(  
     train_nnff: RDD[(NNLabel, Array[BDM[Double]])],  
     bc_config: org.apache.spark.broadcast.Broadcast[NNConfig],  
     nn_p_old: Array[BDM[Double]]): Array[BDM[Double]] = {  
     val nn_p = ArrayBuffer[BDM[Double]]()  
     nn_p += BDM.zeros[Double](1, 1)  
     // calculate running exponential activations for use with sparsity  
     // sparsity计算，计算sparsity，nonSparsityPenalty 是对没达到sparsitytarget的参数的惩罚系数   
     for (i <- 1 to bc_config.value.layer - 1) {  
       val pi1 = train_nnff.map(f => f._1.nna(i))  
       val initpi = BDM.zeros[Double](1, bc_config.value.size(i))  
       val (piSum, miniBatchSize) = pi1.treeAggregate((initpi, 0L))(  
         seqOp = (c, v) => {  
           // c: (nnasum, count), v: (nna)  
           val nna1 = c._1  
           val nna2 = v  
           val nnasum = nna1 + nna2  
           (nnasum, c._2 + 1)  
         },  
         combOp = (c1, c2) => {  
           // c: (nnasum, count)  
           val nna1 = c1._1  
           val nna2 = c2._1  
           val nnasum = nna1 + nna2  
           (nnasum, c1._2 + c2._2)  
         })  
       val piAvg = piSum / miniBatchSize.toDouble  
       val oldpi = nn_p_old(i)  
       val newpi = (piAvg * 0.01) + (oldpi * 0.09)  
       nn_p += newpi  
     }  
     nn_p.toArray  
   }  
   
   /** 
    * NNbp是后向传播 
    * 计算权重的平均偏导数 
    */  
   def NNbp(  
     train_nnff: RDD[(NNLabel, Array[BDM[Double]])],  
     bc_config: org.apache.spark.broadcast.Broadcast[NNConfig],  
     bc_nn_W: org.apache.spark.broadcast.Broadcast[Array[BDM[Double]]],  
     bc_nn_p: org.apache.spark.broadcast.Broadcast[Array[BDM[Double]]]): Array[BDM[Double]] = {  
     // 第n层偏导数：d(n)=-(y-a(n))*f'(z)，sigmoid函数f'(z)表达式:f'(z)=f(z)*[1-f(z)]  
     // sigm: d{n} = - nn.e .* (nn.a{n} .* (1 - nn.a{n}));  
     // {'softmax','linear'}: d{n} = - nn.e;  
     val train_data5 = train_nnff.map { f =>  
       val nn_a = f._1.nna  
       val error = f._1.error  
       val dn = ArrayBuffer[BDM[Double]]()  
       val nndn = bc_config.value.output_function match {  
         case "sigm" =>  
           val fz = nn_a(bc_config.value.layer - 1)  
           (error * (-1.0)) :* (fz :* (1.0 - fz))  
         case "linear" =>  
           error * (-1.0)  
       }  
       dn += nndn  
       (f._1, f._2, dn)  
     }  
     // 第n-1至第2层导数：d(n)=-(w(n)*d(n+1))*f'(z)   
     val train_data6 = train_data5.map { f =>  
       // 假设 f(z) 是sigmoid函数 f(z)=1/[1+e^(-z)]，f'(z)表达式，f'(z)=f(z)*[1-f(z)]      
       // 假设 f(z) tanh f(z)=1.7159*tanh(2/3.*A) ，f'(z)表达式，f'(z)=1.7159 * 2/3 * (1 - 1/(1.7159)^2 * f(z).^2)     
       //val di = ArrayBuffer( BDM((1.765226346140333)))  
       //      val nn_a = ArrayBuffer[BDM[Double]]()  
       //      val a1=BDM((1.0,0.312605257000000,0.848582961000000,0.999014768000000,0.278330771000000,0.462701179000000))  
       //      val a2= BDM((1.0,0.838091550300577,0.996782915917104,0.118033012437165))  
       //      val a3= BDM((2.18788852054974))  
       //      nn_a += a1  
       //      nn_a += a2  
       //      nn_a += a3  
       val nn_a = f._1.nna  
       val di = f._3  
       val dropout = f._2  
       for (i <- bc_config.value.layer - 2 to 1) {  
         // f'(z)表达式  
         val nnd_act = bc_config.value.activation_function match {  
           case "sigm" =>  
             val d_act = nn_a(i) :* (1.0 - nn_a(i))  
             d_act  
           case "tanh_opt" =>  
             val fz2 = (1.0 - ((nn_a(i) :* nn_a(i)) * (1.0 / (1.7159 * 1.7159))))  
             val d_act = fz2 * (1.7159 * (2.0 / 3.0))  
             d_act  
         }  
         // 稀疏度惩罚误差计算:-(t/p)+(1-t)/(1-p)  
         // sparsityError = [zeros(size(nn.a{i},1),1) nn.nonSparsityPenalty * (-nn.sparsityTarget ./ pi + (1 - nn.sparsityTarget) ./ (1 - pi))];  
         val sparsityError = if (bc_config.value.nonSparsityPenalty > 0) {  
           val nn_pi1 = bc_nn_p.value(i)  
           val nn_pi2 = (bc_config.value.sparsityTarget / nn_pi1) * (-1.0) + (1.0 - bc_config.value.sparsityTarget) / (1.0 - nn_pi1)  
           val Bm1 = new BDM(nn_pi2.rows, 1, Array.fill(nn_pi2.rows * 1)(1.0))  
           val sparsity = BDM.horzcat(Bm1, nn_pi2 * bc_config.value.nonSparsityPenalty)  
           sparsity  
         } else {  
           val nn_pi1 = bc_nn_p.value(i)  
           val sparsity = BDM.zeros[Double](nn_pi1.rows, nn_pi1.cols + 1)  
           sparsity  
         }  
         // 导数：d(n)=-( w(n)*d(n+1)+ sparsityError )*f'(z)   
         // d{i} = (d{i + 1} * nn.W{i} + sparsityError) .* d_act;  
         val W1 = bc_nn_W.value(i)  
         val nndi1 = if (i + 1 == bc_config.value.layer - 1) {  
           //in this case in d{n} there is not the bias term to be removed    
           val di1 = di(i - 1)  
           val di2 = (di1 * W1 + sparsityError) :* nnd_act  
           di2  
         } else {  
           // in this case in d{i} the bias term has to be removed  
           val di1 = di(i - 1)(::, 1 to -1)  
           val di2 = (di1 * W1 + sparsityError) :* nnd_act  
           di2  
         }  
         // dropoutFraction  
         val nndi2 = if (bc_config.value.dropoutFraction > 0) {  
           val dropouti1 = dropout(i)  
           val Bm1 = new BDM(nndi1.rows: Int, 1: Int, Array.fill(nndi1.rows * 1)(1.0))  
           val dropouti2 = BDM.horzcat(Bm1, dropouti1)  
           nndi1 :* dropouti2  
         } else nndi1  
         di += nndi2  
       }  
       di += BDM.zeros(1, 1)  
       // 计算最终需要的偏导数值：dw(n)=(1/m)∑d(n+1)*a(n)  
       //  nn.dW{i} = (d{i + 1}' * nn.a{i}) / size(d{i + 1}, 1);  
       val dw = ArrayBuffer[BDM[Double]]()  
       for (i <- 0 to bc_config.value.layer - 2) {  
         val nndW = if (i + 1 == bc_config.value.layer - 1) {  
           (di(bc_config.value.layer - 2 - i).t) * nn_a(i)  
         } else {  
           (di(bc_config.value.layer - 2 - i)(::, 1 to -1)).t * nn_a(i)  
         }  
         dw += nndW  
       }  
       (f._1, di, dw)  
     }  
     val train_data7 = train_data6.map(f => f._3)  
   
     // Sample a subset (fraction miniBatchFraction) of the total data  
     // compute and sum up the subgradients on this subset (this is one map-reduce)  
     val initgrad = ArrayBuffer[BDM[Double]]()  
     for (i <- 0 to bc_config.value.layer - 2) {  
       val init1 = if (i + 1 == bc_config.value.layer - 1) {  
         BDM.zeros[Double](bc_config.value.size(i + 1), bc_config.value.size(i) + 1)  
       } else {  
         BDM.zeros[Double](bc_config.value.size(i + 1), bc_config.value.size(i) + 1)  
       }  
       initgrad += init1  
     }  
     val (gradientSum, miniBatchSize) = train_data7.treeAggregate((initgrad, 0L))(  
       seqOp = (c, v) => {  
         // c: (grad, count), v: (grad)  
         val grad1 = c._1  
         val grad2 = v  
         val sumgrad = ArrayBuffer[BDM[Double]]()  
         for (i <- 0 to bc_config.value.layer - 2) {  
           val Bm1 = grad1(i)  
           val Bm2 = grad2(i)  
           val Bmsum = Bm1 + Bm2  
           sumgrad += Bmsum  
         }  
         (sumgrad, c._2 + 1)  
       },  
       combOp = (c1, c2) => {  
         // c: (grad, count)  
         val grad1 = c1._1  
         val grad2 = c2._1  
         val sumgrad = ArrayBuffer[BDM[Double]]()  
         for (i <- 0 to bc_config.value.layer - 2) {  
           val Bm1 = grad1(i)  
           val Bm2 = grad2(i)  
           val Bmsum = Bm1 + Bm2  
           sumgrad += Bmsum  
         }  
         (sumgrad, c1._2 + c2._2)  
       })  
     // 求平均值  
     val gradientAvg = ArrayBuffer[BDM[Double]]()  
     for (i <- 0 to bc_config.value.layer - 2) {  
       val Bm1 = gradientSum(i)  
       val Bmavg = Bm1 :/ miniBatchSize.toDouble  
       gradientAvg += Bmavg  
     }  
     gradientAvg.toArray  
   }  
   
   /** 
    * NNapplygrads是权重更新 
    * 权重更新 
    */  
   def NNapplygrads(  
     train_nnbp: Array[BDM[Double]],  
     bc_config: org.apache.spark.broadcast.Broadcast[NNConfig],  
     bc_nn_W: org.apache.spark.broadcast.Broadcast[Array[BDM[Double]]],  
     bc_nn_vW: org.apache.spark.broadcast.Broadcast[Array[BDM[Double]]]): Array[Array[BDM[Double]]] = {  
     // nn = nnapplygrads(nn) returns an neural network structure with updated  
     // weights and biases  
     // 更新权重参数：w=w-α*[dw + λw]      
     val W_a = ArrayBuffer[BDM[Double]]()  
     val vW_a = ArrayBuffer[BDM[Double]]()  
     for (i <- 0 to bc_config.value.layer - 2) {  
       val nndwi = if (bc_config.value.weightPenaltyL2 > 0) {  
         val dwi = train_nnbp(i)  
         val zeros = BDM.zeros[Double](dwi.rows, 1)  
         val l2 = BDM.horzcat(zeros, dwi(::, 1 to -1))  
         val dwi2 = dwi + (l2 * bc_config.value.weightPenaltyL2)  
         dwi2  
       } else {  
         val dwi = train_nnbp(i)  
         dwi  
       }  
       val nndwi2 = nndwi :* bc_config.value.learningRate  
       val nndwi3 = if (bc_config.value.momentum > 0) {  
         val vwi = bc_nn_vW.value(i)  
         val dw3 = nndwi2 + (vwi * bc_config.value.momentum)  
         dw3  
       } else {  
         nndwi2  
       }  
       // nn.W{i} = nn.W{i} - dW;  
       W_a += (bc_nn_W.value(i) - nndwi3)  
       // nn.vW{i} = nn.momentum*nn.vW{i} + dW;  
       val nnvwi1 = if (bc_config.value.momentum > 0) {  
         val vwi = bc_nn_vW.value(i)  
         val vw3 = nndwi2 + (vwi * bc_config.value.momentum)  
         vw3  
       } else {  
         bc_nn_vW.value(i)  
       }  
       vW_a += nnvwi1  
     }  
     Array(W_a.toArray, vW_a.toArray)  
   }  
   
   /** 
    * nneval是进行前向传播并计算输出误差 
    * 计算神经网络中的每个节点的输出值，并计算平均误差; 
    */  
   def NNeval(  
     batch_xy: RDD[(BDM[Double], BDM[Double])],  
     bc_config: org.apache.spark.broadcast.Broadcast[NNConfig],  
     bc_nn_W: org.apache.spark.broadcast.Broadcast[Array[BDM[Double]]]): Double = {  
     // NNff是进行前向传播  
     // nn = nnff(nn, batch_x, batch_y);  
     val train_nnff = NeuralNet.NNff(batch_xy, bc_config, bc_nn_W)  
     // error and loss  
     // 输出误差计算  
     val loss1 = train_nnff.map(f => f._1.error)  
     val (loss2, counte) = loss1.treeAggregate((0.0, 0L))(  
       seqOp = (c, v) => {  
         // c: (e, count), v: (m)  
         val e1 = c._1  
         val e2 = (v :* v).sum  
         val esum = e1 + e2  
         (esum, c._2 + 1)  
       },  
       combOp = (c1, c2) => {  
         // c: (e, count)  
         val e1 = c1._1  
         val e2 = c2._1  
         val esum = e1 + e2  
         (esum, c1._2 + c2._2)  
       })  
     val Loss = loss2 / counte.toDouble  
     Loss * 0.5  
   }  
 }  

1.2 NeuralNetModel代码

[java]view plaincopy 
   
 package NN  
   
 import breeze.linalg.{  
   Matrix => BM,  
   CSCMatrix => BSM,  
   DenseMatrix => BDM,  
   Vector => BV,  
   DenseVector => BDV,  
   SparseVector => BSV  
 }  
 import org.apache.spark.rdd.RDD  
   
 /** 
  * label：目标矩阵 
  * features：特征矩阵 
  * predict_label：预测矩阵 
  * error：误差 
  */  
 case class PredictNNLabel(label: BDM[Double], features: BDM[Double], predict_label: BDM[Double], error: BDM[Double]) extends Serializable  
   
 /** 
  * NN(neural network) 
  */  
   
 class NeuralNetModel(  
   val config: NNConfig,  
   val weights: Array[BDM[Double]]) extends Serializable {  
   
   /** 
    * 返回预测结果 
    *  返回格式：(label, feature,  predict_label, error) 
    */  
   def predict(dataMatrix: RDD[(BDM[Double], BDM[Double])]): RDD[PredictNNLabel] = {  
     val sc = dataMatrix.sparkContext  
     val bc_nn_W = sc.broadcast(weights)  
     val bc_config = sc.broadcast(config)  
     // NNff是进行前向传播  
     // nn = nnff(nn, batch_x, batch_y);  
     val train_nnff = NeuralNet.NNff(dataMatrix, bc_config, bc_nn_W)  
     val predict = train_nnff.map { f =>  
       val label = f._1.label  
       val error = f._1.error  
       val nnan = f._1.nna(bc_config.value.layer - 1)  
       val nna1 = f._1.nna(0)(::, 1 to -1)  
       PredictNNLabel(label, nna1, nnan, error)  
     }  
     predict  
   }  
   
   /** 
    * 计算输出误差 
    * 平均误差; 
    */  
   def Loss(predict: RDD[PredictNNLabel]): Double = {  
     val predict1 = predict.map(f => f.error)  
     // error and loss  
     // 输出误差计算  
     val loss1 = predict1  
     val (loss2, counte) = loss1.treeAggregate((0.0, 0L))(  
       seqOp = (c, v) => {  
         // c: (e, count), v: (m)  
         val e1 = c._1  
         val e2 = (v :* v).sum  
         val esum = e1 + e2  
         (esum, c._2 + 1)  
       },  
       combOp = (c1, c2) => {  
         // c: (e, count)  
         val e1 = c1._1  
         val e2 = c2._1  
         val esum = e1 + e2  
         (esum, c1._2 + c2._2)  
       })  
     val Loss = loss2 / counte.toDouble  
     Loss * 0.5  
   }  
   
 }