1.LocalALS:ALS是交替最小二乘法,通常用于推算系统算法
package llf
import org.apache.commons.math3.linear.RealMatrix
import org.apache.commons.math3.linear._
/**
* Created by sendoh on 2015/4/26.
*/
object LocalALS {
var M = 0
var U = 0
var F = 0
var ITERATIONS = 0
val LAMBDA = 0.01
def generateR(): RealMatrix = {
val mh = randomMatrix(M, F)
val uh = randomMatrix(U, F)
mh.multiply(uh.transpose())
}
def rmse(targetR: RealMatrix, ms: Array[RealVector], us: Array[RealVector]): Double = {
val r = new Array2DRowRealMatrix(M, U)
for (i <- 0 until M; j <- 0 until U) {
r.setEntry(i, j, ms(i).dotProduct(us(j)))
}
val diffs = r.subtract(targetR)
var sumSqs = 0.0
for (i <- 0 until M; j <- 0 until U) {
val diff = diffs.getEntry(i, j)
sumSqs += diff * diff
}
math.sqrt(sumSqs / (M.toDouble * U.toDouble))
}
//
def updateMovie(i: Int, m: RealVector, us: Array[RealVector], R: RealMatrix) : RealVector = {
var XtX: RealMatrix = new Array2DRowRealMatrix(F, F)
var Xty: RealVector = new ArrayRealVector(F)
for (j <- 0 until U){
val u = us(j)
XtX = XtX.add(u.outerProduct(u))
Xty = Xty.add(u.mapMultiply(R.getEntry(i, j)))
}
for (d <- 0 until F){
XtX.addToEntry(d, d, LAMBDA * U)
}
new CholeskyDecomposition(XtX).getSolver.solve(Xty)
}
//
def updateUser(j: Int, u:RealVector, ms: Array[RealVector], R: RealMatrix) : RealVector = {
var XtX: RealMatrix = new Array2DRowRealMatrix(F, F)
var Xty: RealVector = new ArrayRealVector(F)
for (i <- 0 until M){
val m = ms(i)
XtX = XtX.add(m.outerProduct(m))
Xty = Xty.add(m.mapMultiply(R.getEntry(i, j)))
}
for (d <- 0 until F){
XtX.addToEntry(d, d, LAMBDA * M)
}
new CholeskyDecomposition(XtX).getSolver.solve(Xty)
}
//
def showWarning() { //显示异常
System.err.println(
"""WARN: This is a naive implementation of ALS and is given as an example!
|Please use the ALS method found in org.apache.spark.mllib.recommendation
|for more conventional use.
""".stripMargin)
}
//
def main(args: Array[String]): Unit ={
args match{
case Array(m, u, f, iters) => {
M = m.toInt
U = u.toInt
F = f.toInt
ITERATIONS = iters.toInt
}
case _ => {
System.err.println("Usage: LocalALS <M> <U> <F> <iters>")
System.exit(1)
}
}
showWarning()
println(s"Running with M=$M, U=$U, F=$F, iters=$ITERATIONS")
val R = generateR()
var ms = Array.fill(M)(randomVector(F))
var us = Array.fill(U)(randomVector(F))
for (iter <- 1 to ITERATIONS){
println(s"Iteration $iter:")
ms = (0 until M).map(i => updateMovie(i, ms(i), us, R)).toArray
us = (0 until U).map(j => updateUser(j, us(j), ms, R)).toArray
println("RMSE = " + rmse(R, ms, us))
println()
}
}
//
private def randomMatrix(rows: Int, cols: Int): RealMatrix =
new Array2DRowRealMatrix(Array.fill(rows, cols)(math.random))
private def randomVector(n: Int): RealVector =
new ArrayRealVector(Array.fill(n)(math.random))
}
还没达到读懂这个算法的程度···
/
2.
package llf
import java.util
/**
* Created by sendoh on 2015/4/28.
*/
object Text {
def main(args: Array[String]): Unit = {
}
//
def playWithInt(): Unit = { //java基本类型对应的scala类
val capacity : Int = 10
val list = new util.ArrayList[String]
list.ensureCapacity(capacity)
}
//元组元素
def getPersonInfo(primaryKey : Int) = {
//假设用primaryKey获取一个人的信息
("Jim", "Bob", "Lilei")
}
val (Firstman, Secondman, Lastman) = getPersonInfo(1)
println(Firstman) // Jim
//创建多行字符串,scala会将三个双引号里的内容保持原样,称为原始字符串
def showWarning() {
System.err.println(
"""WARN: This is a naive implementation of Logistic Regression and is given as an example!
|Please use either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or
|org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
|for more conventional use.
""".stripMargin)
}
//
}
在类里面定义字段,方法,构造函数
class person(val firstname: String, val lastname: String){
private var position: String = _
println("Creating" + toString)
def this(firstname: String, lastname: String, positionHeld: String){
this (firstname, lastname)
position = positionHeld
}
override def toString() : String = {
firstname + " " + lastname + " holds " + position + " position "
}
}
val john = new person("john", "Bob", "Jak")
println(join)
val bill = new person("Bill", "Lon")
println(bill)
//Creating join bob holds null position
//join bob holds jak position
//Creating bill lon holds null position
//bill lon holds null position
类继承
class Vehicle(val id: Int, val year: Int){
override def toString() : String = "ID: " + id + "YEAR: " + year
}
class Car(override val id: Int, override val year: Int, var fuelLevel: Int) extends Vehicle(id, year){
override def toString() : String = super.toString() + "Fuel Level:" + fuelLevel
}
val car = new Car(1, 2015, 100)
println(car)
容器和类型推演
val list1: List[Int] = new ArrayList[Int]
val list2 = new ArrayList[Int]
list2 add 1
list2 add 2
var total = 0
for (val index <- 0 until list2.size()){
total += list.get(index)
}
println(total)
//3
/
3.LocalLR:逻辑回归算法
package llf
import breeze.linalg.{Vector, DenseVector}
import scala.util.Random
/**
* Created by sendoh on 2015/4/30.
*/
object LocalLR {
val N = 10000
val D = 10
val R = 0.7
val ITERATIONS = 5
val rand = new Random(42)
//
case class DataPoint(x: Vector[Double], y: Double)
def generateData: Array[DataPoint] = {
def generatePoint(i: Int): DataPoint = {
val y = if (i % 2 == 0) -1 else 1
val x = DenseVector.fill(D){rand.nextGaussian + y * R} //DenseVector,它的实现就是一个浮点数数组,对向量里所有域都进行存储,适合用于存储密集向量。
//fill充满
//nextGaussian() 方法用于获取下一个伪高斯(“正常地”)分布的均值为0.0,标准差为1.0从此随机数生成器的序列的double值。
DataPoint(x, y)
}
Array.tabulate(N)(generateData)
}
//
def showWarning(): Unit ={
System.err.println(
"""WARN: This is a naive implementation of Logistic Regression and is given as an example!
|Please use either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or
|org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
|for more conventional use.
""".stripMargin)//stripMargin换行对齐
}
//
def main(args: Array[String]): Unit ={
showWarning()
val data = generateData
val w = DenseVector.fill(D){2 * rand.nextDouble - 1}
println("Initial w: " + w)
for (i <- 1 to ITERATIONS){
println("On iteration " + i)
var gradient = DenseVector.zeros[Double](D) //zeros功能是返回一个m×n×p×...的double类零矩阵。注意:m, n, p,...必须是非负整数,
// 负整数将被当做0看待。当没有参数时classname时,产生的是标量零矩阵,有参数时产生指定类型的零矩阵
for (p <- data){
val scale = (1 / (1 + math.exp(-p.y * (w.dot(p.x)))) - 1) * p.y
gradient += p.x * scale
}
w -= gradient
}
println("Final w: " + w)
}
}
4.LocalFileLR
package llf
import breeze.linalg.{Vector, DenseVector}
import scala.util.Random
/**
* Created by sendoh on 2015/4/28.
*/
object LocalFileLR { //逻辑回归算法
val D = 10
val rand = new Random(42)
case class DataPoint(x: Vector[Double], y: Double)
def parsePoint(line: String): DataPoint = {
val nums = line.split(' ').map(_.toDouble)
DataPoint(new DenseVector(nums.slice(1, D + 1)), nums(0))
}
//
def showWarning() {
System.err.println(
"""WARN: This is a naive implementation of Logistic Regression and is given as an example!
|Please use either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or
|org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
|for more conventional use.
""".stripMargin)
}
//
def main(args: Array[String]): Unit ={
showWarning()
val lines = scala.io.Source.fromFile(args(0)).getLines().toArray
val points = lines.map(parsePoint _)
val ITERATIONS = args(1).toInt
val w = DenseVector.fill(D){2 * rand.nextDouble - 1}
println("Initial w:" + w)
for (i <- 1 to ITERATIONS){
println("On iteration:" + i)
var gradient = DenseVector.zeros[Double](D)
for (p <- points){
val scale = (1 / (1 + math.exp(-p.y * (w.dot(p.x)))) - 1) * p.y
gradient += p.x * scale
}
w -= gradient
}
println("Final w: " + w)
}
}