前言
Spark的Web UI可以监控一些作业信息,但是Web UI是跟随作业的生命周期,不方便随时随地的获取作业的信息。
因此,介绍一下以下两种监控方式:Spark自身监控和打造自己的监控
官方地址:http://spark.apache.org/docs/latest/monitoring.html
一、Spark自身监控
Spark自带了history server脚本,只要我们配置好相关参数,即可保存作业信息,事后通过历史来查看作业的信息。
1、配置存储
# vi spark-default.conf 开启日志 设置日志保存目录
spark.eventLog.enabled true
spark.eventLog.dir hdfs://mycluster/log/spark-events
# vi spark-env.sh 指定历史信息的地址
export SPARK_HISTORY_OPTS = "-Dspark.history.fs.logDirectory=hdfs://mycluster/log/spark-events"
# 默认18080端口,云主机修改端口,防止被挖矿
# export SPARK_HISTORY_OPTS = "-Dspark.history.fs.logDirectory=hdfs://mycluster/log/spark-events -Dspark.history.ui.port=123456"
2、开启服务
# jps ==> HistoryServer
./sbin/start-history-server.sh
3、Web UI查看:master:18080
二、打造自己的监控
虽然Spark提供了REST API,但是我们使用SparkListener来监控
1、继承SparkListener,实现其方法,可以拿到很多作业信息,把信息持久化到DB
import com.wsd.spark.utils.DBUtils
import org.apache.spark.SparkConf
import org.apache.spark.internal.Logging
import org.apache.spark.scheduler._
import org.json4s.DefaultFormats
import org.json4s.jackson.Json
import scala.collection.JavaConverters._
import scala.collection.mutable
/**
* appName、jobId、开始时间、结束时间、有几个stage、有几个task、读进来多少数据、shuffle了多少数据、写出去多少数据
* TODO: 能够拿到很多信息 ,把信息都持久化到HBase/Redis/RDBMS
* TODO 如何拿Spark作业的名字?
*
* @author wsd
* @date 2020/3/25 11:35
*/
class SparkListenerDemo(conf: SparkConf) extends SparkListener with Logging {
//定义了一系列的Key
val appNameKey = "appName"
val jobIdKey = "jobId"
val jobStartTimeKey = "jobStartTime"
val jobEndTimeKey = "jobEndTime"
val stageNumKey = "stageNum"
val taskNumKey = "taskNum"
val inputBytesReadKey = "inputBytesRead"
val shuffleBytesKey = "shuffleBytes"
val bytesWrittenKey = "bytesWritten"
var counter = 0
//使用map存取作业的信息
val map = new mutable.HashMap[String, String]()
/**
* 当作业开始
* ==> appName
* jobId
* jobStartTime
*
*
* @param jobStart
*/
override def onJobStart(jobStart: SparkListenerJobStart): Unit = {
val appName = conf.get("spark.app.name")
val jobId = jobStart.jobId
val jobStartTime = jobStart.time
map.put(appNameKey, appName)
map.put(jobIdKey, jobId.toString)
map.put(jobStartTimeKey, jobStartTime.toString)
//logError(appName + " " + jobId + " " + jobStartTime)
}
/**
* 当task执行结束
* ==>
* TaskNum
* inputBytesRead
* shuffleBytes
* bytesWritten
*
*
* @param taskEnd
*/
override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = {
val metrics = taskEnd.taskMetrics
/**
* inputBytesRead":19+37=56
* "shuffleReadMetrics":51+119=170
* "shuffleWriteMetrics":58+112=170
* "outputBytesWritten":14+32=45
*/
val inputBytesRead = map.getOrElse(inputBytesReadKey, 0).toString.toLong + metrics.inputMetrics.bytesRead
val shuffleBytes = map.getOrElse(shuffleBytesKey, 0).toString.toLong + metrics.shuffleWriteMetrics.bytesWritten
val bytesWritten = map.getOrElse(bytesWrittenKey, 0).toString.toLong + metrics.outputMetrics.bytesWritten
counter += 1
map.put(taskNumKey, counter.toString)
map.put(inputBytesReadKey, inputBytesRead.toString)
map.put(shuffleBytesKey, shuffleBytes.toString)
map.put(bytesWrittenKey, bytesWritten.toString)
}
/**
* 作业结束
*
* ==>jobEndTime
* @param jobEnd
*/
override def onJobEnd(jobEnd: SparkListenerJobEnd): Unit = {
val jobEndTime = jobEnd.time
map.put(jobEndTimeKey, jobEndTime.toString)
logError(Json(DefaultFormats).write(map))
//scala map ==> java map 添加隐式转换
val javaMap = map.map {
case (k, v) => (k, v)
}.asJava
DBUtils.add(javaMap)
}
}
2、DBUtils
import java.sql.*;
import java.util.Map;
/**
* MySQL工具类
*
* @author wsd
* @date 2020/3/26 10:27
*/
public class DBUtils {
static {
try {
Class.forName("com.mysql.jdbc.Driver");
// Class.forName("com.mysql.cj.jdbc.Driver");
} catch (ClassNotFoundException e) {
e.printStackTrace();
}
}
private static Connection getConnection() {
try {
return DriverManager.getConnection("jdbc:mysql://localhost:3306/ruozedata?serverTimezone=UTC"
, "root", "123456");
} catch (SQLException e) {
e.printStackTrace();
}
return null;
}
/**
* val appNameKey = "appName"
* val jobIdKey = "jobId"
* val jobStartTimeKey = "jobStartTime"
* val jobEndTimeKey = "jobEndTime"
* val stageNumKey = "stageNum"
* val taskNumKey = "taskNum"
* val inputBytesReadKey = "inputBytesRead"
* val shuffleBytesKey = "shuffleBytes"
* val bytesWrittenKey = "bytesWritten"
*
* @param map
*/
public static boolean add(Map<String, String> map) {
Connection conn = null;
PreparedStatement psmt = null;
try {
conn = getConnection();
psmt = conn.prepareStatement("insert into spark_job_info (appname,jobid,jobstarttime,jobendtime,tasknum,inputsize,shufflesize,outputsize) values(?,?,?,?,?,?,?,?)");
System.out.println(map.size());
psmt.setString(1, map.getOrDefault("appName", "-"));
psmt.setString(2, map.getOrDefault("jobId", "-"));
psmt.setString(3, map.getOrDefault("jobStartTime", "-"));
psmt.setString(4, map.getOrDefault("jobEndTime", "-"));
psmt.setString(5, map.getOrDefault("taskNum", "-"));
psmt.setString(6, map.getOrDefault("inputBytesRead", "-"));
psmt.setString(7, map.getOrDefault("shuffleBytes", "-"));
psmt.setString(8, map.getOrDefault("bytesWritten", "-"));
boolean result = psmt.execute();
return result;
} catch (SQLException e) {
e.printStackTrace();
return false;
} finally {
close(conn, psmt, null);
}
}
/**
* 关闭
*
* @param conn
* @param st
* @param set
*/
private static void close(Connection conn, PreparedStatement st, ResultSet set) {
try {
if (set != null) {
set.close();
}
if (st != null) {
st.close();
}
if (conn != null) {
conn.close();
}
} catch (Exception e) {
e.printStackTrace();
}
}
public static void main(String[] args) {
Connection connection = getConnection();
System.out.println(connection);
}
}
3、使用通过sparkConf.set进去
val sparkConf = new SparkConf() //.setMaster("local").setAppName("SparkWC")
.set("spark.extraListeners", "com.wsd.sparkcore.listener.SparkListenerDemo")
三、总结
以上只是简单使用了SparkListener拿到作业信息,具体拿到信息后保存到哪?通过邮箱或短信等如何告警?需要根据自己的业务进一步开发