大数据系列-SPARK-STREAMING流数据state
package com.test
import org.apache.spark.SparkConf
import org.apache.spark.streaming.dstream.{DStream, ReceiverInputDStream}
import org.apache.spark.streaming.{Seconds, StreamingContext}
//有状态state函数updateStateByKey
object SparkStreamingState {
def main(args: Array[String]): Unit = {
val sparkConf = new SparkConf().setAppName("SparkStreamingState").setMaster("local[*]")
val streamingContext = new StreamingContext(sparkConf, Seconds(5))
streamingContext.checkpoint("data/cpDir")
val dstream: ReceiverInputDStream[String] = streamingContext.socketTextStream("localhost", 8600)
val wordToMap: DStream[(String, Int)] = dstream.map((_, 1))
val wordState: DStream[(String, Int)] = wordToMap.updateStateByKey((seq: Seq[Int]/*相同KEY的VALUE值*/, option: Option[Int]/*缓冲区中相同KEY的VALUE值*/) => {
Option(seq.sum + option.getOrElse(0))
})
wordState.print()
streamingContext.start()
streamingContext.awaitTermination()
}
}