package com.test
import org.apache.spark.streaming.{Duration, Seconds, StreamingContext}
import org.apache.spark.{SparkConf, SparkContext}
/**
* @author kevinwyu
* @create 2022-04-12 15:57
* @description
*
*/
object SparkStreamingDemo {
val checkpointDirectory="hdfs://..."
// 从检查点数据中获取StreamingContext或者重新创建一个
val context = StreamingContext.getOrCreate(checkpointDirectory, () => {
functionToCreateContext(checkpointDirectory)
})
// 在需要完成的context上做额外的配置
// 无论其有没有启动
context
// 启动context
context.start()
context.awaitTermination()
// 创建和设置一个新的StreamingContext
def functionToCreateContext(checkpointDirectory:String): StreamingContext = {
val sc = new SparkContext(new SparkConf()) // new context
val ssc = new StreamingContext(sc, Seconds(60))
ssc.checkpoint(checkpointDirectory)// 设置检查点目录
val dstream= ssc.socketTextStream() // create DStreams
// 通常,检查点设置间隔是5-10个DStream的滑动间隔,一般在有状态算子后面checkpoint
// minbatch 3s
dstream.checkpoint(Seconds(180))
ssc.checkpoint(checkpointDirectory)
ssc
}
}
sparkStreaming demo
于 2022-04-12 16:58:19 首次发布