// 创建流数据生成器
// 随机线性回归数据的生成器
object StreamingModelProducer {
import breeze.linalg._
def main (args:Array[String]){
val MaxEvents = 100
val NumFeatures = 100
val random = new Random()
//生成服从整体分布的稠密向量的函数
def generateRandomArray(n:Int)= Array.tabulate(n)(_ =>random.nextGaussian()//
// 生成一个确定的随机模型权重向量
val w = new DenseVector(generateRandomArray(NumFeatures))
val intercept= random.nextGussian()*10
// 生成一些随机数据事件
def generateNoisyData(n:Int) ={
(1 to n).map { i=>
val x = new DenseVector(generateRandomArray(NumFeatures))
val y: Double =w.dot(x)
val noisy = y + intercept
(noisy,x)
}
}
// 创建网络生成器
val listener = new ServerSocket(9999)
println("listening on port:9999")
while (true) {
val socket=listener.accpt()
new Thread(){
override def run={
println("got client connected from:" +socket.getInetAddress)
val out = new PrintWriter(socket.getOutputStream(),true)
while (true) {
Thread.sleep(1000)
val num = radom.nextInt(MaxEvents)
val data = generateNoisyDate(num)
data.foreach {case (y,x) => val xStr =x.data.mkString(",")
val eventStr=s"$y\t$xStr"
out.write(eventStr)
out.write("\n")
}
out.flush()
println(s"created $num events")
}
socket.close()
}
}.start()
}
}
}
//创建流回归模型
object SimpleStreamingModel{
def main(args: Array[String]){
val ssc = new StreamingContext("local[2]","First streaming app", Seconds(10))
val stream =ssc.socketTextStream("localhost",9999)
val NumFeatures =10
val zeroVecotr=DenseVector.zeros[Double](NumFeatures)
val model = new StreamingLinearRegressionWithSGD().setInitialWeights(Vectors.dense(zeroVector.data))
.setNumIterations(1)
.setStepSize(0.01)
val labeledStream = stream.map{ event =>
val split =event.split("\t")
val y = split(0).toDouble
val features=split(1).split(",").map(_.toDouble)
LabeledPoint(label=y, features = Vectors.dense(features))
}
model.trainOn(labeledStream)
model.predictOn(labeledStream).print()
ssc.strat()
ssc.awaitTermination()
}
}