spark 监听

spark 监听

spark 提供了一系列整个任务生命周期中各个阶段变化的事件监听机制。通过这一机制可以在任务的各个阶段做一些自定义的各种动作。

SparkListener便是这些阶段的事件监听接口类,通过实现这个类中的各种方法便可实现自定义的事件处理动作。

如下是sparkListener的实现方法,可以通过下面的参数获取spark的数据

object SparkAppListener extends SparkListener with Logging{
      //完成的job数量
      private var JobNum = 0
      //Job和Job信息(包括总task数,当前完成task数,当前Job百分比)的映射
      val jobToJobInfo = new scala.collection.mutable.HashMap[Int, (Int, Int, Int)]
      //stageId和Job的映射,用户获取task对应的job
      private val stageToJob = new scala.collection.mutable.HashMap[Int, Int]
      //完成的job数量
      private var finishJobNum = 0
      private var hasException: Boolean = false
      var totalPercent = 0
      val jobMap = new scala.collection.mutable.HashMap[String,String]
    override def onStageCompleted(stageCompleted: SparkListenerStageCompleted): Unit = {
      System.out.println("stageCompleted")
    }

    override def onStageSubmitted(stageSubmitted: SparkListenerStageSubmitted): Unit = {
    }

    override def onTaskStart(taskStart: SparkListenerTaskStart): Unit ={
      System.out.println("taskStart")
    }

    override def onTaskGettingResult(taskGettingResult: SparkListenerTaskGettingResult): Unit ={

    }

    override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit ={
      //==========================输入输出================================
      val inputMetrics = taskEnd.taskMetrics.inputMetrics
      val outputMetrics = taskEnd.taskMetrics.outputMetrics
      val input_output = scala.collection.mutable.HashMap(
        "bytesRead" -> inputMetrics.bytesRead, //读取的大小
        "recordsRead" -> inputMetrics.recordsRead, //总记录数
        "bytesWritten" -> outputMetrics.bytesWritten,//输出的大小
        "recordsWritten" -> outputMetrics.recordsWritten//输出的记录数
      )
      println(input_output)

      val metrics = taskEnd.taskMetrics
      val taskMetricsMap = scala.collection.mutable.HashMap(
        "executorDeserializeTime" -> metrics.executorDeserializeTime, //executor的反序列化时间
        "executorDeserializeCpuTime" -> metrics.executorDeserializeCpuTime, //executor的反序列化的 cpu时间
        "executorRunTime" -> metrics.executorRunTime, //executoor的运行时间
        "resultSize" -> metrics.resultSize, //结果集大小
        "jvmGCTime" -> metrics.jvmGCTime, //
        "resultSerializationTime" -> metrics.resultSerializationTime,
        "memoryBytesSpilled" -> metrics.memoryBytesSpilled, //内存溢写的大小
        "diskBytesSpilled" -> metrics.diskBytesSpilled, //溢写到磁盘的大小
        "peakExecutionMemory" -> metrics.peakExecutionMemory //executor的最大内存
      )
      println(taskMetricsMap)
      //======================shuffle指标================================
      val shuffleReadMetrics = metrics.shuffleReadMetrics
      val shuffleWriteMetrics = metrics.shuffleWriteMetrics

      val shuffleMap = scala.collection.mutable.HashMap(
        "remoteBlocksFetched" -> shuffleReadMetrics.remoteBlocksFetched, //shuffle远程拉取数据块
        "localBlocksFetched" -> shuffleReadMetrics.localBlocksFetched, //本地块拉取
        "remoteBytesRead" -> shuffleReadMetrics.remoteBytesRead, //shuffle远程读取的字节数
        "localBytesRead" -> shuffleReadMetrics.localBytesRead, //读取本地数据的字节
        "fetchWaitTime" -> shuffleReadMetrics.fetchWaitTime, //拉取数据的等待时间
        "recordsRead" -> shuffleReadMetrics.recordsRead, //shuffle读取的记录总数
        "bytesWritten" -> shuffleWriteMetrics.bytesWritten, //shuffle写的总大小
        "recordsWritte" -> shuffleWriteMetrics.recordsWritten, //shuffle写的总记录数
        "writeTime" -> shuffleWriteMetrics.writeTime
      )
      println(shuffleMap)

      val taskInfo: TaskInfo = taskEnd.taskInfo

      val taskInfoMap = scala.collection.mutable.HashMap(
        "taskId" -> taskInfo.taskId ,
        "host" -> taskInfo.host ,
        "speculative" -> taskInfo.speculative , //推测执行
        "failed" -> taskInfo.failed ,
        "killed" -> taskInfo.killed ,
        "running" -> taskInfo.running
      )

      println(taskInfoMap)
      val stageId = taskEnd.stageId
      val jobId = stageToJob.get(stageId).get
      val (totalTaskNum: Int, finishTaskNum: Int, percent: Int) = jobToJobInfo.get(jobId).get
      val currentFinishTaskNum = finishTaskNum + 1
      val newPercent = currentFinishTaskNum * 100 / totalTaskNum
      jobToJobInfo(jobId) = (totalTaskNum,currentFinishTaskNum,newPercent)
      if (newPercent > percent){
        //hanlde application progress
        totalPercent = newPercent
        println(totalPercent)
      }
      log.info("taskEnd")
    }

    override def onJobStart(jobStart: SparkListenerJobStart): Unit = {
      val dateFormat = new SimpleDateFormat("yyyy-MM-dd" + " HH:mm:ss")
      jobMap += ("startTime" ->  dateFormat.format(jobStart.time))
      println(jobMap)

      val jobId = jobStart.jobId
      JobNum = jobStart.stageInfos.map(stageInfo => stageInfo.numTasks).sum
      jobToJobInfo += (jobId ->(JobNum, 0, 0))
      jobStart.stageIds.map(stageId => stageToJob(stageId) = jobId)
      log.info("jobStart")
    }

    override def onJobEnd(jobEnd: SparkListenerJobEnd): Unit = {
      val dateFormat = new SimpleDateFormat("yyyy-MM-dd" + " HH:mm:ss")
      jobMap += ("totalPercent" -> totalPercent.toString)
      jobMap += ("endTime" -> dateFormat.format(jobEnd.time))
      println(jobMap)
      log.info("jobEnd")
    }

    override def onEnvironmentUpdate(environmentUpdate: SparkListenerEnvironmentUpdate): Unit = {
    }

    override def onBlockManagerAdded(blockManagerAdded: SparkListenerBlockManagerAdded): Unit = {
    }

    override def onBlockManagerRemoved(blockManagerRemoved: SparkListenerBlockManagerRemoved): Unit = {
    }

    override def onUnpersistRDD(unpersistRDD: SparkListenerUnpersistRDD): Unit = {
    }

    override def onApplicationStart(applicationStart: SparkListenerApplicationStart): Unit =  {
      log.info("start")
    }

    override def onApplicationEnd(applicationEnd: SparkListenerApplicationEnd): Unit =  {
      log.info("applicationEnd")
    }

    override def onExecutorMetricsUpdate(executorMetricsUpdate: SparkListenerExecutorMetricsUpdate): Unit = {
    }

    override def onExecutorAdded(executorAdded: SparkListenerExecutorAdded): Unit = {
    }

    override def onExecutorRemoved(executorRemoved: SparkListenerExecutorRemoved): Unit = {
    }

    override def onExecutorBlacklisted(executorBlacklisted: SparkListenerExecutorBlacklisted): Unit = {
    }

    override def onExecutorUnblacklisted(executorUnblacklisted: SparkListenerExecutorUnblacklisted): Unit = {
    }

    override def onNodeBlacklisted(nodeBlacklisted: SparkListenerNodeBlacklisted): Unit = {
    }

    override def onNodeUnblacklisted(nodeUnblacklisted: SparkListenerNodeUnblacklisted): Unit = {
    }

    override def onBlockUpdated(blockUpdated: SparkListenerBlockUpdated): Unit = {
    }

    override def onSpeculativeTaskSubmitted(speculativeTask: SparkListenerSpeculativeTaskSubmitted): Unit = {
    }

    override def onOtherEvent(event: SparkListenerEvent): Unit = {
    }


  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值