参看网上查阅的一些资料,结合自己实践,整理如下:
方法一、程序中设置一个开关来停止服务(推荐)
当检查到HDFS中存在"/user/root/stop_sparkStreaming"目录时,则优雅地停止服务
package com.sjmz.sparkdemo
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext}
/**
* To run this on your local machine, you need to first run a Netcat server
* nc -lk 9999
* and then run the example
* spark-submit --class com.sjmz.sparkdemo.SparkStreamWordCount localhost 9999 xxx.jar
*/
object SparkStreamWordCount {
val shutdownMarker = "/user/root/stop_sparkStreaming"
// flag to stop the spark streaming service
var stopFlag: Boolean = false
def main(args: Array[String]): Unit = {
if (args.length < 2) {
System.err.println("Usage: SparkStreamWordCount <hostname> <port>")
System.exit(1)
}
// Create a local StreamingContext with two working thread and batch interval of 10 second.
// The master requires 2 cores to prevent a starvation scenario.
val conf = new SparkConf().setMaster("local[2]").setAppName("SparkStream_NetworkWordCount")
val ssc = new StreamingContext(conf, Seconds(10))
// Create a DStream that will connect to hostname:port, like localhost:9999
val lines = ssc.socketTextStream(args(0), args(1).toInt)
// Split each line into words
val words = lines.flatMap(_.split(" "))
// Count each word in each batch
val pairs = words.map(word => (word, 1))
val wordCounts = pairs.reduceByKey(_ + _)
// Print the first ten elements of each RDD generated in this DStream to the console
wordCounts.print()
// Start the computation
ssc.start()
val checkIntervalMillis = 10000
var isStopped: Boolean = false
while (!stopFlag) {
isStopped = ssc.awaitTerminationOrTimeout(checkIntervalMillis)
if (isStopped) {
println("WARNING!!! The spark streaming context is stopped. Exiting application ......")
} else {
println("spark streaming is still running......")
}
toShutDown_SparkStreaming
if (!isStopped && stopFlag) {
println("======> to stop ssc right now")
//第一个true:停止相关的SparkContext。无论这个流媒体上下文是否已经启动,底层的SparkContext都将被停止
//第二个true:等待所有接收到的数据的处理完成,然后优雅地停止
ssc.stop(true, true)
println("<====== ssc is stopped !!!")
}
}
}
def toShutDown_SparkStreaming = {
if (!stopFlag) {
// 检查是否要停止spark streaming service
val fs = FileSystem.get(new Configuration())
// 如果shutdownMarker目录存在,则停止服务
stopFlag = fs.exists(new Path(shutdownMarker))
}
}
}
方法二、在sparkConf中设置如下参数,通过spark ui页面找到job执行点击kill
sparkConf.set("spark.streaming.stopGracefullyOnShutdown","true")