import org.apache.log4j.{Logger,Level}
import org.apache.spark.SparkConf
import org.apache.spark.streaming.dstream.DStream
import org.apache.spark.streaming.{Seconds, StreamingContext}
object SparkStreaming {
val checkpointDirectory="hdfs://hadoop02:9000/values"
def functionToCreateContext(): StreamingContext = {
//install
val conf = new SparkConf().setAppName("SparkStreaming").setMaster("local[2]")
val ssc = new StreamingContext(conf,Seconds(1))
Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
ssc.checkpoint(checkpointDirectory)
//input
val lineDStream = ssc.socketTextStream("hadoop02",9999)
//deal
val wordOneDStream: DStream[(String, Int)] = lineDStream.flatMap(_.split(",")).map((_,1))
val resultDStream: DStream[(String, Int)] = wordOneDStream.updateStateByKey((newValue: Seq[Int], runningCount: Option[Int]) =>
Some(newValue.sum + runningCount.getOrElse(0)))
//output
resultDStream.print()
//return
ssc
}
def main(args: Array[String]): Unit = {
val ssc = StreamingContext.getOrCreate(checkpointDirectory, functionToCreateContext _)
Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
ssc.start()
ssc.awaitTermination()
ssc.stop()
}
}