SparkStreaming

1.reduceByKey,只执行当前输入

package com.zpark.stu.sparkstream

import org.apache.log4j.{Level, Logger}
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext}

object SparkStreamDemo {
  Logger.getLogger("org").setLevel(Level.WARN)

  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setAppName("SparkStreamDemo").setMaster("local[*]")
    //StreamingContext第二个参数是指定间隔多久执行一次
    val ssc = new StreamingContext(conf, Seconds(3))

    //hostname指定读取文件的机器,port指定读取文件的端口号
    val lines = ssc.socketTextStream("hdp-1", 9999)
    lines.flatMap(_.split(" ")).map(x => (x, 1)).reduceByKey(_+_).print()

    //启动服务
    ssc.start()
    //等待结束
    ssc.awaitTermination()

  }
}

2.updateStateByKey,记录历史记录,其参数需要传入一个更新方法

package com.zpark.stu.sparkstream

import org.apache.log4j.{Level, Logger}
import org.apache.spark.{HashPartitioner, SparkConf}
import org.apache.spark.streaming.{Seconds, StreamingContext}

object SparkStreamDemo2 {
  Logger.getLogger("org").setLevel(Level.WARN)

  val updateFunc = (iter: Iterator[(String, Seq[Int], Option[Int])]) => {
    iter.flatMap{case(x,y,z) => Some(y.sum + z.getOrElse(0)).map(m => (x,m))}
  }

  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setAppName("SparkStreamDemo2").setMaster("local[*]")
    val ssc = new StreamingContext(conf, Seconds(6))

    ssc.checkpoint("E:\\hadoop\\spark\\WordCount\\sparkStreamOut")
    val lines = ssc.socketTextStream("hdp-1", 9999)

    lines.flatMap(_.split(" ")).map((_,1)).updateStateByKey(updateFunc, new HashPartitioner(ssc.sparkContext.defaultParallelism), true).print()

    ssc.start()
    ssc.awaitTermination()
  }


}

3.reduceByKeyAndWindow:设置窗口的大小和滑动窗口的间隔来动态的获取当前Steaming的允许状态,基于窗口的操作会在一个比 StreamingContext 的批次间隔更长的时间范围内,通过整合多个批次的结果,计算出整个窗口的结果。

package com.zpark.stu.window_operations

import org.apache.log4j.{Level, Logger}
import org.apache.spark.SparkConf
import org.apache.spark.streaming.dstream.DStream
import org.apache.spark.streaming.{Seconds, StreamingContext}

object WordCountWindowDemo {
  Logger.getLogger("org").setLevel(Level.WARN)
  def main(args: Array[String]): Unit = {

    val updateFunc = (values: Seq[Int], state: Option[Int]) => {
      val currentCount = values.foldLeft(0)(_ + _)
      val previousCount = state.getOrElse(0)
      Some(currentCount + previousCount)
    }

    val conf = new SparkConf().setAppName("WordCountWindowDemo").setMaster("local[*]")
    val ssc = new StreamingContext(conf, Seconds(6))
    ssc.checkpoint("E:\\hadoop\\spark\\WordCount\\sparkWindowOut")
    val lens = ssc.socketTextStream("hdp-1",9999)

    val mapDs: DStream[(String, Int)] = lens.flatMap(_.split(" ")).map((_,1))
//    mapDs.print()

    val updateDs: DStream[(String, Int)] = mapDs.updateStateByKey(updateFunc)
//    updateDs.print()
    //窗口12秒,滑步6秒。
    updateDs.reduceByKeyAndWindow((a: Int, b: Int) => (a + b), Seconds(12), Seconds(6)).print()
//    val wordCounts: DStream[(String, Int)] = mapDs.reduceByKeyAndWindow((a: Int, b: Int) => (a + b), Seconds(12), Seconds(6))
//
//    wordCounts.print()
    ssc.start()
    ssc.awaitTermination()

  }
}

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值