一、大数据之优雅关闭
流式任务需要 7*24 小时执行,但是有时涉及到升级代码需要主动停止程序,但是分布式程序,没办法做到一个个进程去杀死,所有配置优雅的关闭就显得至关重要了。使用外部文件系统来控制内部程序关闭。
二、模拟第三方数据案例
1、优雅关闭
package com.spack.bigdata.streaming
import org.apache.spark.SparkConf
import org.apache.spark.streaming.dstream.DStream
import org.apache.spark.streaming.{Seconds, StreamingContext, StreamingContextState}
/**
*
* 优雅的关闭
*/
object SparkStreaming08_Close {
def main(args: Array[String]): Unit = {
/**
* 线程的关闭:
* val thread = new Thread()
* thread.start()
*
*
* thread.stop() 强制关闭
*/
val sparkConf = new SparkConf().setMaster("local[*]").setAppName("SparkStreaming")
val scc = new StreamingContext(sparkConf, Seconds(3))
scc.checkpoint("cp") //需要设定检查点路径
val lines = scc.socketTextStream("localhost", 9999)
val wordToOne = lines.map((_, 1))
scc.start()
//如果想要关闭采集器,那么需要创建新的线程
// 而且需要在第三方程序中增加关闭状态
new Thread(new Runnable {
override def run(): Unit = {
/**
* 第一个参数关闭SprkConf环境
* 第二个参数 优雅的关闭
* TODO scc.stop(true, true)
*
* 关闭的时候并不是强制关闭、而是让计算节点不再接受新的数据、而是直接把当前节点处理完毕再关闭
* Mysql:Table(stopSpark) =>Row => data
* Redis:Data(K,V)
* ZK :/stopSpark
* HDFS :/ stopSprk
*/
//演示代码
// while (true) {
// //假如从Mysql取
// if (true) {
// //获取SparkStreaming状态
// val state: StreamingContextState = scc.getState() //环境状态
//
// //如果 == ACTIVE 就关闭
// if (state == StreamingContextState.ACTIVE) {
// scc.stop(true, true)
// }
// }
// //取不到就休眠
// Thread.sleep(5000)
//
// }
Thread.sleep(5000)
//获取SparkStreaming状态
val state: StreamingContextState = scc.getState() //环境状态
//如果 == ACTIVE 就关闭
if (state == StreamingContextState.ACTIVE) {
scc.stop(true, true)
}
//在while true的时候需要关闭
System.exit(0)
}
})
scc.awaitTermination() //block阻塞main线程
}
}
2、恢复数据
package com.spack.bigdata.streaming
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext, StreamingContextState}
/**
*
* 优雅的关闭
* 恢复数据
*/
object SparkStreaming09_Resume {
def main(args: Array[String]): Unit = {
//第一个参数检查点
//第二个参数: 创建环境对象
val ssc = StreamingContext.getActiveOrCreate("cp", () => {
val sparkConf = new SparkConf().setMaster("local[*]").setAppName("SparkStreaming")
val scc = new StreamingContext(sparkConf, Seconds(3))
val lines = scc.socketTextStream("localhost", 9999)
val wordToOne = lines.map((_, 1))
wordToOne.print()
scc
})
ssc.checkpoint("cp") //需要设定检查点路径
ssc.start()
ssc.awaitTermination() //block阻塞main线程
}
}
三、HDFS第三方
1、优雅关闭HDFS
import java.net.URI
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.streaming.{StreamingContext, StreamingContextState}
class MonitorStop(ssc: StreamingContext) extends Runnable {
override def run(): Unit = {
val fs: FileSystem = FileSystem.get(new URI("hdfs://linux1:9000"), new
Configuration(), "atguigu")
while (true) {
try
Thread.sleep(5000)
catch {
case e: InterruptedException =>
e.printStackTrace()
}
val state: StreamingContextState = ssc.getState
val bool: Boolean = fs.exists(new Path("hdfs://linux1:9000/stopSpark"))
if (bool) {
if (state == StreamingContextState.ACTIVE) {
ssc.stop(stopSparkContext = true, stopGracefully = true)
System.exit(0)
}
}
}
} }
2、优雅关闭HDFS
import org.apache.spark.SparkConf
import org.apache.spark.streaming.dstream.{DStream, ReceiverInputDStream}
import org.apache.spark.streaming.{Seconds, StreamingContext}
object SparkTest {
def createSSC(): _root_.org.apache.spark.streaming.StreamingContext = {
val update: (Seq[Int], Option[Int]) => Some[Int] = (values: Seq[Int], status:
Option[Int]) => {
//当前批次内容的计算
val sum: Int = values.sum
//取出状态信息中上一次状态
val lastStatu: Int = status.getOrElse(0)
Some(sum + lastStatu)
}
val sparkConf: SparkConf = new
SparkConf().setMaster("local[4]").setAppName("SparkTest")
//设置优雅的关闭
sparkConf.set("spark.streaming.stopGracefullyOnShutdown", "true")
val ssc = new StreamingContext(sparkConf, Seconds(5))
ssc.checkpoint("./ck")
val line: ReceiverInputDStream[String] = ssc.socketTextStream("linux1", 9999)
val word: DStream[String] = line.flatMap(_.split(" "))
val wordAndOne: DStream[(String, Int)] = word.map((_, 1))
val wordAndCount: DStream[(String, Int)] = wordAndOne.updateStateByKey(update)
wordAndCount.print()
ssc
}
def main(args: Array[String]): Unit = {
val ssc: StreamingContext = StreamingContext.getActiveOrCreate("./ck", () =>
createSSC())
new Thread(new MonitorStop(ssc)).start()
ssc.start()
ssc.awaitTermination()
} }