大数据之优雅关闭完整使用 (第十六章)

小坏讲微服务

已于 2022-07-25 10:52:48 修改

阅读量325

点赞数

分类专栏：小坏讲大数据Spark第十阶段文章标签：大数据 spark hadoop

于 2022-07-25 10:50:42 首次发布

本文链接：https://blog.csdn.net/qq_42082701/article/details/125970504

版权

小坏讲大数据Spark第十阶段专栏收录该内容

17 篇文章 3 订阅

订阅专栏

大数据之优雅关闭

一、大数据之优雅关闭
二、模拟第三方数据案例
- 1、优雅关闭
- 2、恢复数据
三、HDFS第三方
- 1、优雅关闭HDFS
- 2、优雅关闭HDFS

一、大数据之优雅关闭

流式任务需要 7*24 小时执行，但是有时涉及到升级代码需要主动停止程序，但是分布式程序，没办法做到一个个进程去杀死，所有配置优雅的关闭就显得至关重要了。使用外部文件系统来控制内部程序关闭。

二、模拟第三方数据案例

1、优雅关闭

package com.spack.bigdata.streaming

import org.apache.spark.SparkConf
import org.apache.spark.streaming.dstream.DStream
import org.apache.spark.streaming.{Seconds, StreamingContext, StreamingContextState}

/**
 *
 * 优雅的关闭
 */
object SparkStreaming08_Close {
  def main(args: Array[String]): Unit = {

    /**
     * 线程的关闭：
     * val thread = new Thread()
     * thread.start()
     *
     *
     * thread.stop() 强制关闭
     */

    val sparkConf = new SparkConf().setMaster("local[*]").setAppName("SparkStreaming")
    val scc = new StreamingContext(sparkConf, Seconds(3))
    scc.checkpoint("cp") //需要设定检查点路径
    val lines = scc.socketTextStream("localhost", 9999)
    val wordToOne = lines.map((_, 1))

    scc.start()

    //如果想要关闭采集器，那么需要创建新的线程
    // 而且需要在第三方程序中增加关闭状态
    new Thread(new Runnable {
      override def run(): Unit = {
        /**
         * 第一个参数关闭SprkConf环境
         * 第二个参数 优雅的关闭
         * TODO  scc.stop(true, true)
         *
         * 关闭的时候并不是强制关闭、而是让计算节点不再接受新的数据、而是直接把当前节点处理完毕再关闭
         * Mysql：Table(stopSpark) =>Row => data
         * Redis：Data(K,V)
         * ZK :/stopSpark
         * HDFS :/ stopSprk
         */

        //演示代码
        //        while (true) {
        //          //假如从Mysql取
        //          if (true) {
        //            //获取SparkStreaming状态
        //            val state: StreamingContextState = scc.getState() //环境状态
        //
        //            //如果 == ACTIVE 就关闭
        //            if (state == StreamingContextState.ACTIVE) {
        //              scc.stop(true, true)
        //            }
        //          }
        //          //取不到就休眠
        //          Thread.sleep(5000)
        //
        //        }


        Thread.sleep(5000)
        //获取SparkStreaming状态
        val state: StreamingContextState = scc.getState() //环境状态
        //如果 == ACTIVE 就关闭
        if (state == StreamingContextState.ACTIVE) {
          scc.stop(true, true)
        }
        //在while true的时候需要关闭
        System.exit(0)

      }
    })


    scc.awaitTermination() //block阻塞main线程


  }

}

2、恢复数据

package com.spack.bigdata.streaming

import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext, StreamingContextState}

/**
 *
 * 优雅的关闭
 * 恢复数据
 */
object SparkStreaming09_Resume {
  def main(args: Array[String]): Unit = {

    //第一个参数检查点
    //第二个参数: 创建环境对象
    val ssc = StreamingContext.getActiveOrCreate("cp", () => {
      val sparkConf = new SparkConf().setMaster("local[*]").setAppName("SparkStreaming")
      val scc = new StreamingContext(sparkConf, Seconds(3))

      val lines = scc.socketTextStream("localhost", 9999)
      val wordToOne = lines.map((_, 1))
      wordToOne.print()
      scc

    })

    ssc.checkpoint("cp") //需要设定检查点路径

    ssc.start()
    ssc.awaitTermination() //block阻塞main线程


  }

}

三、HDFS第三方

1、优雅关闭HDFS

import java.net.URI
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.streaming.{StreamingContext, StreamingContextState}
class MonitorStop(ssc: StreamingContext) extends Runnable {
 override def run(): Unit = {
 val fs: FileSystem = FileSystem.get(new URI("hdfs://linux1:9000"), new 
Configuration(), "atguigu")
 while (true) {
 try
 Thread.sleep(5000)
 catch {
 case e: InterruptedException =>
 e.printStackTrace()
 }
 val state: StreamingContextState = ssc.getState
 val bool: Boolean = fs.exists(new Path("hdfs://linux1:9000/stopSpark"))
 if (bool) {
 if (state == StreamingContextState.ACTIVE) {
 ssc.stop(stopSparkContext = true, stopGracefully = true)
 System.exit(0)
 }
 }
 }
 } }

2、优雅关闭HDFS

import org.apache.spark.SparkConf
import org.apache.spark.streaming.dstream.{DStream, ReceiverInputDStream}
import org.apache.spark.streaming.{Seconds, StreamingContext}
object SparkTest {
 def createSSC(): _root_.org.apache.spark.streaming.StreamingContext = {
 val update: (Seq[Int], Option[Int]) => Some[Int] = (values: Seq[Int], status: 
Option[Int]) => {
 //当前批次内容的计算
 val sum: Int = values.sum
 //取出状态信息中上一次状态
 val lastStatu: Int = status.getOrElse(0)
 Some(sum + lastStatu)
 }
 val sparkConf: SparkConf = new 
SparkConf().setMaster("local[4]").setAppName("SparkTest")
 //设置优雅的关闭
 sparkConf.set("spark.streaming.stopGracefullyOnShutdown", "true")
 val ssc = new StreamingContext(sparkConf, Seconds(5))
 ssc.checkpoint("./ck")
 val line: ReceiverInputDStream[String] = ssc.socketTextStream("linux1", 9999)
 val word: DStream[String] = line.flatMap(_.split(" "))
 val wordAndOne: DStream[(String, Int)] = word.map((_, 1))
 val wordAndCount: DStream[(String, Int)] = wordAndOne.updateStateByKey(update)
 wordAndCount.print()
 ssc
 }
 def main(args: Array[String]): Unit = {
 val ssc: StreamingContext = StreamingContext.getActiveOrCreate("./ck", () => 
createSSC())
 new Thread(new MonitorStop(ssc)).start()
 ssc.start()
 ssc.awaitTermination()
 } }