spark：仿写案例--34

最新推荐文章于 2015-07-31 11:28:01 发布

一流小风一

最新推荐文章于 2015-07-31 11:28:01 发布

阅读量649

点赞数

分类专栏： spark

本文链接：https://blog.csdn.net/fenger1943/article/details/45401643

版权

spark 专栏收录该内容

60 篇文章 0 订阅

订阅专栏

1.LocalALS：ALS是交替最小二乘法，通常用于推算系统算法

package llf

import org.apache.commons.math3.linear.RealMatrix
import org.apache.commons.math3.linear._
/**
 * Created by sendoh on 2015/4/26.
 */
object LocalALS {
  var M = 0
  var U = 0
  var F = 0
  var ITERATIONS = 0
  val LAMBDA = 0.01

  def generateR(): RealMatrix = {
    val mh = randomMatrix(M, F)
    val uh = randomMatrix(U, F)
    mh.multiply(uh.transpose())
  }
  def rmse(targetR: RealMatrix, ms: Array[RealVector], us: Array[RealVector]): Double = {
    val r = new Array2DRowRealMatrix(M, U)
    for (i <- 0 until M; j <- 0 until U) {
      r.setEntry(i, j, ms(i).dotProduct(us(j)))
    }
    val diffs = r.subtract(targetR)
    var sumSqs = 0.0
    for (i <- 0 until M; j <- 0 until U) {
      val diff = diffs.getEntry(i, j)
      sumSqs += diff * diff
    }
    math.sqrt(sumSqs / (M.toDouble * U.toDouble))
  }
  //
  def updateMovie(i: Int, m: RealVector, us: Array[RealVector], R: RealMatrix) : RealVector = {
    var XtX: RealMatrix = new Array2DRowRealMatrix(F, F)
    var Xty: RealVector = new ArrayRealVector(F)
    for (j <- 0 until U){
      val u = us(j)
      XtX = XtX.add(u.outerProduct(u))
      Xty = Xty.add(u.mapMultiply(R.getEntry(i, j)))
    }
    for (d <- 0 until F){
      XtX.addToEntry(d, d, LAMBDA * U)
    }
    new CholeskyDecomposition(XtX).getSolver.solve(Xty)
  }
  //
  def updateUser(j: Int, u:RealVector, ms: Array[RealVector], R: RealMatrix) : RealVector = {
    var XtX: RealMatrix = new Array2DRowRealMatrix(F, F)
    var Xty: RealVector = new ArrayRealVector(F)
    for (i <- 0 until M){
      val m = ms(i)
      XtX = XtX.add(m.outerProduct(m))
      Xty = Xty.add(m.mapMultiply(R.getEntry(i, j)))
    }
    for (d <- 0 until F){
      XtX.addToEntry(d, d, LAMBDA * M)
    }
    new CholeskyDecomposition(XtX).getSolver.solve(Xty)
  }
  //
  def showWarning() { //显示异常
    System.err.println(
      """WARN: This is a naive implementation of ALS and is given as an example!
        |Please use the ALS method found in org.apache.spark.mllib.recommendation
        |for more conventional use.
      """.stripMargin)
  }
  //
  def main(args: Array[String]): Unit ={
    args match{
      case Array(m, u, f, iters) => {
        M = m.toInt
        U = u.toInt
        F = f.toInt
        ITERATIONS = iters.toInt
      }
      case _ => {
        System.err.println("Usage: LocalALS <M> <U> <F> <iters>")
        System.exit(1)
      }
    }
    showWarning()
    println(s"Running with M=$M, U=$U, F=$F, iters=$ITERATIONS")
    val R = generateR()
    var ms = Array.fill(M)(randomVector(F))
    var us = Array.fill(U)(randomVector(F))
    for (iter <- 1 to ITERATIONS){
      println(s"Iteration $iter:")
      ms = (0 until M).map(i => updateMovie(i, ms(i), us, R)).toArray
      us = (0 until U).map(j => updateUser(j, us(j), ms, R)).toArray
      println("RMSE = " + rmse(R, ms, us))
      println()
    }
  }
  //
  private def randomMatrix(rows: Int, cols: Int): RealMatrix =
    new Array2DRowRealMatrix(Array.fill(rows, cols)(math.random))
  private def randomVector(n: Int): RealVector =
    new ArrayRealVector(Array.fill(n)(math.random))
}

还没达到读懂这个算法的程度···

package llf

import java.util

/**
 * Created by sendoh on 2015/4/28.
 */
object Text {
  def main(args: Array[String]): Unit = {

  }
  //
  def playWithInt(): Unit = { //java基本类型对应的scala类
    val capacity : Int = 10
    val list = new util.ArrayList[String]
    list.ensureCapacity(capacity)
  }
  //元组元素
  def getPersonInfo(primaryKey : Int) = {
    //假设用primaryKey获取一个人的信息
    ("Jim", "Bob", "Lilei")
  }
  val (Firstman, Secondman, Lastman) = getPersonInfo(1)
  println(Firstman) // Jim
  //创建多行字符串，scala会将三个双引号里的内容保持原样，称为原始字符串
  def showWarning() {
    System.err.println(
      """WARN: This is a naive implementation of Logistic Regression and is given as an example!
        |Please use either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or
        |org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
        |for more conventional use.
      """.stripMargin)
  }
  //
}
在类里面定义字段，方法，构造函数
class person(val firstname: String, val lastname: String){
  private var position: String = _
  println("Creating" + toString)
  def this(firstname: String, lastname: String, positionHeld: String){
    this (firstname, lastname)
    position = positionHeld
  }
  override def toString() : String = {
    firstname + " " + lastname + " holds " + position + " position "
  }
}
val john = new person("john", "Bob", "Jak")
println(join)
val bill = new person("Bill", "Lon")
println(bill)
//Creating join bob holds null position
//join bob holds jak position
//Creating bill lon holds null position
//bill lon holds null position
类继承
class Vehicle(val id: Int, val year: Int){
  override def toString() : String = "ID: " + id + "YEAR: " + year
}
class Car(override val id: Int, override val year: Int, var fuelLevel: Int) extends Vehicle(id, year){
  override def toString() : String = super.toString() + "Fuel Level:" + fuelLevel
}
val car = new Car(1, 2015, 100)
println(car)
容器和类型推演
val list1: List[Int] = new ArrayList[Int]
val list2 = new ArrayList[Int]
list2 add 1
list2 add 2
var total = 0
for (val index <- 0 until list2.size()){
  total += list.get(index)
}
println(total)
//3

3.LocalLR：逻辑回归算法

package llf

import breeze.linalg.{Vector, DenseVector}

import scala.util.Random

/**
 * Created by sendoh on 2015/4/30.
 */
object LocalLR {
  val N = 10000
  val D = 10
  val R = 0.7
  val ITERATIONS = 5
  val rand = new Random(42)
  //
  case class DataPoint(x: Vector[Double], y: Double)

  def generateData: Array[DataPoint] = {
    def generatePoint(i: Int): DataPoint = {
      val y = if (i % 2 == 0) -1 else 1
      val x = DenseVector.fill(D){rand.nextGaussian + y * R} //DenseVector，它的实现就是一个浮点数数组，对向量里所有域都进行存储，适合用于存储密集向量。
                                      //fill充满
                                      //nextGaussian() 方法用于获取下一个伪高斯(“正常地”)分布的均值为0.0，标准差为1.0从此随机数生成器的序列的double值。
      DataPoint(x, y)
    }
    Array.tabulate(N)(generateData)
  }
  //
  def showWarning(): Unit ={
    System.err.println(
      """WARN: This is a naive implementation of Logistic Regression and is given as an example!
        |Please use either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or
        |org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
        |for more conventional use.
      """.stripMargin)//stripMargin换行对齐
  }
  //
  def main(args: Array[String]): Unit ={
    showWarning()
    val data = generateData
    val w = DenseVector.fill(D){2 * rand.nextDouble - 1}
    println("Initial w: " + w)

    for (i <- 1 to ITERATIONS){
      println("On iteration " + i)
      var gradient = DenseVector.zeros[Double](D) //zeros功能是返回一个m×n×p×...的double类零矩阵。注意：m, n, p,...必须是非负整数，
                                      // 负整数将被当做0看待。当没有参数时classname时，产生的是标量零矩阵，有参数时产生指定类型的零矩阵
      for (p <- data){
        val scale = (1 / (1 + math.exp(-p.y * (w.dot(p.x)))) - 1) * p.y
        gradient += p.x * scale
      }
      w -= gradient
    }
    println("Final w: " + w)
  }

}

4.LocalFileLR

package llf

import breeze.linalg.{Vector, DenseVector}
import scala.util.Random

/**
 * Created by sendoh on 2015/4/28.
 */
object LocalFileLR { //逻辑回归算法
  val D = 10
  val rand = new Random(42)
  case class DataPoint(x: Vector[Double], y: Double)
  def parsePoint(line: String): DataPoint = {
    val nums = line.split(' ').map(_.toDouble)
    DataPoint(new DenseVector(nums.slice(1, D + 1)), nums(0))
  }
  //
  def showWarning() {
    System.err.println(
      """WARN: This is a naive implementation of Logistic Regression and is given as an example!
        |Please use either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or
        |org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
        |for more conventional use.
      """.stripMargin)
  }
  //
  def main(args: Array[String]): Unit ={
    showWarning()
    val lines = scala.io.Source.fromFile(args(0)).getLines().toArray
    val points = lines.map(parsePoint _)
    val ITERATIONS = args(1).toInt
    val w = DenseVector.fill(D){2 * rand.nextDouble - 1}
    println("Initial w:" + w)
    for (i <- 1 to ITERATIONS){
      println("On iteration:" + i)
      var gradient = DenseVector.zeros[Double](D)
      for (p <- points){
        val scale = (1 / (1 + math.exp(-p.y * (w.dot(p.x)))) - 1) * p.y
        gradient += p.x * scale
      }
      w -= gradient
    }
    println("Final w: " + w)
  }

}

一流小风一

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
spark：仿写案例--34

1.LocalALS：ALS是交替最小二乘法，通常用于推算系统算法////////////////////////////////////////////////////////////////////////////////////////////////////////////////package llfimport org.apache.commons.math3.linear
复制链接

扫一扫