实现Spark的流监控
流式作业应该7*24⼩时不间断运⾏,那么这期间如果出现问题,需要及时邮件报警;
并且,在上⼀篇文章中⾃定义维护offset的时候,也需要实现事务的offset提交;
因此,我们需要实现接⼝:
实现接⼝后,代码的流程顺序如下:
实现StreamingListener,以监控spark作业状态,传⼊StreamingContext可以在某种出错时退
出当前的SparkStreaming各个函数的调⽤顺序
onReceiverStarted->[接收到数据]->
onBatchSubmitted->
onBatchStarted->
onOutputOperationStarted->
onOutputOperationCompleted->
onBatchCompleted->[接收到数据]->
onBatchSubmitted->
onBatchStarted->
onOutputOperationStarted->
onOutputOperationCompleted->
onBatchCompleted->.......->
onReceiverStopped
其中[接收到数据]是可选项,并不是每次都会接收到数据
代码部分
package com.cartravel.spark
import com.cartravel.kafka.KafkaManager
import com.cartravel.mailAlarm.MailUtil
import com.cartravel.readApplicationconfUtil.readApplicatuinFileUtil
import com.cartravel.redis.JedisUtil
import com.cartravel.tools.DataStruct
import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.spark.SparkConf
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SparkSession
import org.apache.spark.streaming.scheduler.{StreamingListener, StreamingListenerBatchCompleted, StreamingListenerBatchStarted, StreamingListenerBatchSubmitted, StreamingListenerOutputOperationCompleted, StreamingListenerReceiverError, StreamingListenerReceiverStarted, StreamingListenerReceiverStopped}
import org.json4s.DefaultFormats
import org.json4s.jackson.Json
import redis.clients.jedis.Jedis
/**
* 流的监控
*为什么要进行流监控?
* 我们要知道当前这个流有没有阻塞,如果运行慢了,你怎么知道他慢不慢啊.
*/
class realTimeStreamMonitoring(
session:SparkSession,
conf:SparkConf,
//窗口执行时间,使用这个参数做拥堵的简单计算,比如说窗口是5s而现在30s还没有执行完说明当前已经流阻塞了
duration:Int,
appName:String, //在进行邮件告警的时候,要知道是那个流任务出现了异常
rdd:RDD[ConsumerRecord[String, String]],
kafkaManger:KafkaManager
) extends StreamingListener{
val jedisUtil: JedisUtil = JedisUtil.getInstance()
val jedis: Jedis = jedisUtil.getJedis
jedis.select(3)
val currentTime: Long = System.currentTimeMillis()
var map: Map[String, String] = Map[String,String]()
//接收的开始,我就默认就可以了
override def onReceiverStarted(receiverStarted: StreamingListenerReceiverStarted): Unit = super.onReceiverStarted(receiverStarted)
//接收停止
override def onReceiverStopped(receiverStopped: StreamingListenerReceiverStopped): Unit = super.onReceiverStopped(receiverStopped)
//接收失败,实现邮件告警
override def onReceiverError(receiverError: StreamingListenerReceiverError): Unit ={
val lastErrorMessage = receiverError.receiverInfo.lastErrorMessage
if (lastErrorMessage != null){
if (conf.getBoolean("enableSendEmailOnTaskFail",defaultValue = false)){
val args = Array(readApplicatuinFileUtil.getConf("main.host"),s"spark监控任务_$appName",lastErrorMessage)
val properties = DataStruct.convertProp(
("mail.host", readApplicatuinFileUtil.getConf("mail.host")),
("mail.transport.protocol", readApplicatuinFileUtil.getConf("mail.transport.protocol")),
("mail.smtp.auth", readApplicatuinFileUtil.getConf("mail.smtp.auth"))
)
MailUtil.sendMail(properties,args) //里头需要一个properties,使用它来封装,是否发送邮件啊,邮件地址,域名,端口....
}
Thread.interrupted() //重置中断状态,把挂掉的线程在拉起来
}
}
/**
*监控调度时间
*/
override def onBatchStarted(batchStarted: StreamingListenerBatchStarted): Unit = {
//调度时间
val schedulingDelay = batchStarted.batchInfo.schedulingDelay.get.toString
map += ("schedulingDelay"->schedulingDelay)
}
override def onBatchSubmitted(batchSubmitted: StreamingListenerBatchSubmitted): Unit = {
//获得当前批次提交的数据量
val numRecords = batchSubmitted.batchInfo.numRecords.toString
map += ("numRecords"->numRecords)
}
override def onBatchCompleted(batchCompleted: StreamingListenerBatchCompleted): Unit ={
/**
* onBatchCompleted什么时候会触发?
* 一定是流执行成功之后才会回调这个方法
* 所有我们可以把偏移量的提交放在这个里面,只有流执行成功才会回调onBatchCompleted,通过这个方法在提交偏移量
*/
kafkaManger.persistOffset(rdd)
//这个批次处理的总耗时
val batchTime = batchCompleted.batchInfo.totalDelay.get.toString
map +=("batchTime"->batchTime)
val key = s"StreamMonitor_${appName}_$currentTime"
jedis.set(key,Json(DefaultFormats).write(map))
jedis.expire(key,3600)
if (duration*6<batchTime.toLong*1000){//毫秒级的要与秒级比大小乘以1000
//窗口是5s我乘6=30s,30s都没有完成整个批次,说明已经出现延迟了
if (conf.getBoolean("enableSendEmailOnTaskFail",defaultValue = false)){
val processingDelay = batchCompleted.batchInfo.processingDelay //处理时间
val monitorContext =
s"""
|StreamListener
|总消耗时间:${batchTime.toLong}
|处理时间:$processingDelay
|请及时检查!!!
""".stripMargin
val args = Array(readApplicatuinFileUtil.getConf("main.host"),s"sparkStreaming监控任务_${appName}_程序出现堵塞",monitorContext)
val properties = DataStruct.convertProp(
("mail.host", readApplicatuinFileUtil.getConf("mail.host")),
("mail.transport.protocol", readApplicatuinFileUtil.getConf("mail.transport.protocol")),
("mail.smtp.auth", readApplicatuinFileUtil.getConf("mail.smtp.auth"))
)
MailUtil.sendMail(properties,args) //里头需要一个properties,使用它来封装,是否发送邮件啊,邮件地址,域名,端口....
}
}
}
override def onOutputOperationCompleted(outputOperationCompleted: StreamingListenerOutputOperationCompleted): Unit = {
//输出阶段也有可能报错,所以我们将其监控起来
val outPutDuration = outputOperationCompleted.outputOperationInfo.duration.get //输出的总耗时
val failureReason: String = outputOperationCompleted.outputOperationInfo.failureReason.get //输出的时候具体错误信息
map +=("outPutDuration"->outPutDuration)
if (failureReason != null){
if (conf.getBoolean("enableSendEmailOnTaskFail",defaultValue = false)){
val monitorContext =
s"""
|StreamListener
|输出所耗时间:$outPutDuration
|错误原因:$failureReason
|请及时检查!!!
""".stripMargin
val args = Array(readApplicatuinFileUtil.getConf("main.host"),s"sparkStreming监控任务_输出出现异常",monitorContext)
val properties = DataStruct.convertProp(
("mail.host", readApplicatuinFileUtil.getConf("mail.host")),
("mail.transport.protocol", readApplicatuinFileUtil.getConf("mail.transport.protocol")),
("mail.smtp.auth", readApplicatuinFileUtil.getConf("mail.smtp.auth"))
)
MailUtil.sendMail(properties,args) //里头需要一个properties,使用它来封装,是否发送邮件啊,邮件地址,域名,端口....
}
}
}
}
我们怎么将这个流式监控运作起来?
在流的主程序中,当业务逻辑执行成功时,我们使用streamingContext.streamingContext.addStreamingListener(传入一个流监控类)
addStreamingListener这个方法类似于挂钩,只要程序一运行任务就要调用这个流监控