[Spark2.4] 增加Prometheus Sink带来的问题，并通过源码分析原因

package ...;

import com.codahale.metrics.MetricRegistry;
import ....PrometheusReporter;
import io.prometheus.client.exporter.HTTPServer;
import org.apache.commons.lang3.StringUtils;
import org.apache.curator.framework.CuratorFramework;
import org.apache.curator.framework.CuratorFrameworkFactory;
import org.apache.curator.retry.RetryForever;
import org.apache.spark.metrics.sink.Sink;
import org.apache.zookeeper.CreateMode;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.net.InetAddress;
import java.net.InetSocketAddress;
import java.net.ServerSocket;
import java.nio.charset.StandardCharsets;
import java.util.Properties;
import java.util.concurrent.TimeUnit;

/**
 * @Author:
 * @Date: 2022/1/17
 * @TIME: 11:08
 */
public class PrometheusSink implements Sink {
    private final static Logger LOG = LoggerFactory.getLogger(PrometheusSink.class);
    private final static String ZK_HOSTS_KEY = "zkHosts";
    private final static String ZK_SESSION_TIMEOUT_MS = "sessionTimeoutMs";
    private final static String ZK_CONNECT_TIMEOUT_MS = "connectTimeoutMs";
    private final static String ZK_RETRY_INTERVAL_MS = "retryIntervalMs";
    private final static String SPARK_JOB_NAME = "jobName";
    private final Properties property;
    private final MetricRegistry metricRegistry;
    private HTTPServer httpServer;
    private CuratorFramework zkClient;
    private PrometheusReporter prometheusReporter;
    public PrometheusSink(Properties property,
                          MetricRegistry registry,
                          org.apache.spark.SecurityManager securityMgr) {
        this.property = property;
        this.metricRegistry = registry;
    }

    @Override
    public void start() {
        try {
            String zkHosts = this.property.getProperty(ZK_HOSTS_KEY);
            if (StringUtils.isEmpty(zkHosts))
                throw new RuntimeException("[[instance].sink.prometheus.zkHosts]不能为空");
            int sessionTimeoutMs = Integer.parseInt(this.property
                    .getProperty(ZK_SESSION_TIMEOUT_MS, "30000"));
            int connectTimeoutMs = Integer.parseInt(this.property
                    .getProperty(ZK_CONNECT_TIMEOUT_MS, "10000"));
            int retryInterval = Integer.parseInt(this.property
                    .getProperty(ZK_RETRY_INTERVAL_MS, "3000"));
            String jobName = this.property.getProperty(SPARK_JOB_NAME);
            if (StringUtils.isEmpty(jobName))
                throw new RuntimeException("[[instance].sink.prometheus.jobName]不能为空");
            this.prometheusReporter = new PrometheusReporter(this.metricRegistry,jobName);
            int retryCount = 0;
            InetSocketAddress inetSocketAddress = new InetSocketAddress(InetAddress.getLocalHost(), 0);
            String url = "http://%s:%d/metrics";
            String ip = null;
            int port;
            while (true) {
                try {
                    try (ServerSocket serverSocket = new ServerSocket()) {
                        serverSocket.bind(inetSocketAddress);
                        ip = inetSocketAddress.getAddress().getHostAddress();
                        port = serverSocket.getLocalPort();
                        url = String.format(url, ip, port);
                    }
                    this.httpServer = new HTTPServer(port);
                    break;
                } catch (IOException ioException) {
                    if(++retryCount==3) {
                        throw ioException;
                    }
                }
            }
            LOG.info("start [Prometheus] metrics sink\nHttp EndPoint:{}",String.format(url,ip, port));

            //注册ZK
            this.zkClient = CuratorFrameworkFactory.newClient(zkHosts,sessionTimeoutMs,connectTimeoutMs,new RetryForever(retryInterval));
            this.zkClient.start();
            LOG.info("启动zk client,zkHosts=[{}],sessionTimeout=[{}] ms,connectTimeout=[{}] ms," +
                            "failOverRetryInterval=[{}] ms",zkHosts,sessionTimeoutMs,connectTimeoutMs,
                    retryInterval);
            this.zkClient.blockUntilConnected();
            String zNodePath = "/waterfall/spark/metrics/promethues/"+jobName+"/config";
            this.zkClient.create().creatingParentsIfNeeded().withMode(CreateMode.EPHEMERAL_SEQUENTIAL)
                    .forPath(zNodePath,url.getBytes(StandardCharsets.US_ASCII));
        }catch (Exception e) {
            throw new RuntimeException("启动prometheus metrics失败",e);
        }
        this.prometheusReporter.start(15, TimeUnit.SECONDS);
    }

    @Override
    public void stop() {
        if(zkClient!=null) {
            zkClient.close();
        }
        if(this.httpServer!=null)
            this.httpServer.close();
        this.prometheusReporter.stop();
    }

    @Override
    public void report() {
        //源码来看，只有在JVM停止前才会触发report
        LOG.info("JVM进程即将停止，会自动调用一次report");
        this.prometheusReporter.report();
    }
}

服务发现源码

package ...;

import ....config.zookeeper.ZookeeperConfigProperties;
import ....config.mvc.exception.BusinessException;
import org.apache.curator.framework.CuratorFramework;
import org.apache.curator.framework.CuratorFrameworkFactory;
import org.apache.curator.framework.recipes.cache.TreeCache;
import org.apache.curator.framework.recipes.cache.TreeCacheEvent;
import org.apache.curator.retry.RetryForever;
import org.apache.zookeeper.data.Stat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.util.StringUtils;

import java.io.Closeable;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.*;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;

/**
 * @Author: 
 * @Date: 2021/12/15
 * @TIME: 16:55
 */
public class FlinkServiceDiscovery implements Closeable {
    private final static Logger LOG = LoggerFactory.getLogger(FlinkServiceDiscovery.class);

    private final ZookeeperConfigProperties zkConfig;
    private final Path parentPath;
    private CuratorFramework zkClient;
    private TreeCache treeCache;

    //正在运行的flink task
    private final Map<String, Set<String>> activeServiceMap = new HashMap<>();
    private final ReadWriteLock serviceMapLock = new ReentrantReadWriteLock();
    //下面两个属性，用于实现第一次TreeNode Cache初始化成功后才启动spring上下文
    private final AtomicBoolean hasStart = new AtomicBoolean(false);
    private final CountDownLatch countDownLatch = new CountDownLatch(1);
    public FlinkServiceDiscovery(ZookeeperConfigProperties zkConfig) {
        this.zkConfig = zkConfig;
        parentPath = Paths.get(this.zkConfig.getFlinkMetricsZnodePath());
    }

    /**
     *
     * @return true:启动成功；flase：启动失败
     * @throws InterruptedException
     */
    public boolean start() throws InterruptedException {
        LOG.info("start flink metrics server的服务发现功能，连接的zookeeper:[{}],监控的ZNode Path:[{}]",
                this.zkConfig.getZkHosts(), this.zkConfig.getFlinkMetricsZnodePath());
        this.zkClient = CuratorFrameworkFactory
                .newClient(this.zkConfig.getZkHosts(), new RetryForever(3000));
        this.zkClient.start();
        //等待连接建立成功
        this.zkClient.blockUntilConnected();
        try {
            Stat stat = this.zkClient.checkExists()
                    .forPath(this.zkConfig.getFlinkMetricsZnodePath());
            if (stat == null) {
                LOG.warn("不存在ZNode路径:[{}],因此不会启动服务发现功能",this.zkConfig.getFlinkMetricsZnodePath());
                return false;
            }
            LOG.info("开始对ZNode:[{}]建立监听", this.zkConfig.getFlinkMetricsZnodePath());
            this.treeCache = TreeCache.newBuilder(this.zkClient, this.zkConfig.getFlinkMetricsZnodePath())
                    .setCacheData(true)
                    .build();
            treeCache.getListenable().addListener(this::ZnodeListenerFunction);
            this.treeCache.start();
            return true;
        } catch (Exception exception) {
            LOG.warn("建立服务发现功能失败", exception);
            try {
                this.close();
            } catch (IOException ioException) {
                //do nothing
            }
        }
        return false;
    }

    /**
     * 等待TreeNode的Cache建立完成
     * @throws InterruptedException
     */
    public void waitUntilWatcherReady() throws InterruptedException {
        LOG.info("等待watcher Node Cache initial");
        if(!this.hasStart.get()) {
            this.countDownLatch.await();
        }
        LOG.info("watcher Node Cache初始化完成");
    }

    /**
     * 获取当前所有存活的flink task的taskName和prometheus servers的URL[]
     *
     * @return
     */
    public Map<String, Set<String>> findAllActiveServices() {
        try {
            this.serviceMapLock.readLock().lock();
            return this.activeServiceMap;
        }finally {
            this.serviceMapLock.readLock().unlock();
        }
    }

    /**
     * ZNode变化的回调方法
     *
     * @param curatorFramework
     * @param event
     */
    private void ZnodeListenerFunction(CuratorFramework curatorFramework, TreeCacheEvent event) {
        switch (event.getType()) {
            case NODE_ADDED:
                this.addServiceMap(event);
                break;
            case NODE_UPDATED:
                LOG.warn("node update,{}", event.getData().toString());
                //理论上不会发生
                break;
            case NODE_REMOVED:
                this.deleteServiceMap(event);
                break;
            case CONNECTION_LOST:
            case CONNECTION_SUSPENDED:
                LOG.warn("zookeeper连接丢失,event type={}", event.getType());
                break;
            case INITIALIZED:
                LOG.debug("event type={}", event.getType());
                if(this.hasStart.compareAndSet(false,true)) {
                    this.countDownLatch.countDown();
                }
            default:
                LOG.debug("event type={}", event.getType());
        }
    }

    /**
     * 增加
     *
     * @param addEvent
     */
    private void addServiceMap(TreeCacheEvent addEvent) {
        String watchPath = addEvent.getData().getPath();
        byte[] watchDataArray = addEvent.getData().getData();
        String watchData = "";
        if (watchDataArray != null && watchDataArray.length > 0) {
            watchData = new String(watchDataArray, StandardCharsets.UTF_8);
        }
        LOG.debug("node add,path={},data={}", watchPath, watchData);
        //由于zookeeper使用的资源定位方式类似于Path，因此直接通过java.nio.Path类来处理
        Path path = Paths.get(watchPath);
        Path subPath = this.parentPath.relativize(path);
        if (subPath.getNameCount() == 2) {
            String taskName = subPath.getName(0).toString();
            try {
                this.serviceMapLock.writeLock().lock();
                if (this.activeServiceMap.containsKey(taskName)) {
                    Set<String> set = this.activeServiceMap.get(taskName);
                    set.add(watchData);
                } else {
                    Set<String> set = new HashSet<>();
                    set.add(watchData);
                    this.activeServiceMap.put(taskName, set);
                }
            } finally {
                this.serviceMapLock.writeLock().unlock();
            }
        } else
            LOG.warn("ADD变化的ZNode Path={}", watchPath);
    }

    /**
     * 删除
     *
     * @param delEvent
     */
    private void deleteServiceMap(TreeCacheEvent delEvent) {
        String watchPath = delEvent.getData().getPath();
        byte[] watchDataArray = delEvent.getData().getData();
        String watchData = "";
        if (watchDataArray != null && watchDataArray.length > 0) {
            watchData = new String(watchDataArray, StandardCharsets.UTF_8);
        }
        LOG.debug("node delete,path={},data={}", watchPath, watchData);
        Path path = Paths.get(watchPath);
        Path subPath = this.parentPath.relativize(path);
        if (subPath.getNameCount() == 1) {
            String taskName = subPath.getFileName().toString();
            LOG.warn("ZNode被手动删除,Path={}",watchPath);
            if(!StringUtils.hasText(taskName))
                return;
            try {
                this.serviceMapLock.writeLock().lock();
                this.activeServiceMap.remove(taskName);
            }finally {
                this.serviceMapLock.writeLock().unlock();
            }
        } else if (subPath.getNameCount() == 2) {
            String taskName = subPath.getName(0).toString();
            LOG.debug("flink task:[{}], metric server:[{}]的进程结束",taskName,watchData);
            try {
                this.serviceMapLock.writeLock().lock();
                if(this.activeServiceMap.containsKey(taskName)) {
                    this.activeServiceMap.get(taskName).remove(watchData);
                    if(this.activeServiceMap.get(taskName).isEmpty()) {
                        this.activeServiceMap.remove(taskName);
                    }
                }
            }finally {
                this.serviceMapLock.writeLock().unlock();
            }
        } else
            LOG.warn("DELETE变化的ZNode Path={}", watchPath);
    }

    @Override
    public void close() throws IOException {
        LOG.info("关闭flink metrics服务发现功能");
        if (this.treeCache != null)
            this.treeCache.close();
        if (this.zkClient != null)
            this.zkClient = null;
    }
}

如何采集spark metrics

spark的metrics system使用的数据结构定义，与prometheus不兼容，因此需要实现一个Reporter类，专门负责数据结构的转换工作。

MetricsReporter源码

package ...;

import com.codahale.metrics.*;

import java.util.Map;
import java.util.SortedMap;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * @Author: 
 * @Date: 2022/1/17
 * @TIME: 13:53
 */
public class PrometheusReporter extends ScheduledReporter {
    private static final Pattern APPLICATION_ID_REGEX = Pattern.compile("(application_\\d+_\\d+)");

    private static io.prometheus.client.Gauge GAUGE_COLLECTOR = null;

    //NOTE:这里涉及到spark submit的源码部分，AM会阻塞(默认100S)等待driver端的sparkContext初始化结束
    private static AtomicInteger INDENTITY_COUNTER = new AtomicInteger(0);//1:driver,2:AM

    public PrometheusReporter(MetricRegistry registry, String jobName) {
        super(registry, "prometheus-reporter", MetricFilter.ALL, TimeUnit.SECONDS, TimeUnit.SECONDS);
        jobName = jobName.replaceAll("\\.", "_");
        String gaugeNameSuffix = null;
        if(INDENTITY_COUNTER.incrementAndGet()==1) {
            gaugeNameSuffix = "_driver";
        }else {
            gaugeNameSuffix = "_am";
        }
        GAUGE_COLLECTOR = io.prometheus.client.Gauge.build("spark_guage_" + jobName+gaugeNameSuffix,
                "spark gauge metrics")
                .labelNames("application_id", "origin_lable").register();
    }

    @SuppressWarnings("all")
    @Override
    public void report(SortedMap<String, Gauge> gaugeMap,
                       SortedMap<String, Counter> countMap,
                       SortedMap<String, Histogram> histogramMap,
                       SortedMap<String, Meter> meterMap,
                       SortedMap<String, Timer> timerMap) {
        /**
         * 1、目前先只解析gauge和counter两类指标
         */
        if (gaugeMap != null) {
            for (Map.Entry<String, Gauge> gaugeEntry : gaugeMap.entrySet()) {
                String metricName = gaugeEntry.getKey();
                String[] splitKeyworlds = this.spilitMetricName(metricName);
                Gauge gauge = gaugeEntry.getValue();
                if(gauge.getValue() instanceof Number) {
                    GAUGE_COLLECTOR.labels(splitKeyworlds[1],splitKeyworlds[0]).set(Double.parseDouble(String.valueOf(gauge.getValue())));
                }
            }
        }

        if(countMap!=null) {
            for (Map.Entry<String, Counter> counterEntry : countMap.entrySet()) {
                String metricName = counterEntry.getKey();
                String[] splitKeyworlds = this.spilitMetricName(metricName);
                Counter counter = counterEntry.getValue();
                GAUGE_COLLECTOR.labels(splitKeyworlds[1],splitKeyworlds[0])
                        .set(counter.getCount());
            }
        }
    }

    private String[] spilitMetricName(String metricName) {
        String[] result = new String[2];
        result[0] = metricName;
        Matcher matcher = APPLICATION_ID_REGEX.matcher(metricName);
        if(matcher.find()) {
            result[0] = metricName.replaceAll("application_\\d+_\\d+","appid");
            result[1] = matcher.group(1);
        }else {
            result[1] = "NaN";
        }
        result[0] = result[0].replaceAll("\\.","_");
        return result;
    }


}

配置spark conf启用prometheus sink

配置方式有很多，例如通过${SPKAR_HOME}/conf/metrics.properties增加配置。我个人更喜欢通过spark-submit --conf的方式设置。

spark-submit --master yarn --deploy-mode cluster \
--conf spark.metrics.conf.*.sink.prometheus.class=...spark.metric.sink.PrometheusSink \
--conf spark.metrics.conf.*.sink.prometheus.zkHosts=xxx:2181 \
--conf spark.metrics.conf.*.sink.prometheus.sessionTimeoutMs=30000 \
--conf spark.metrics.conf.*.sink.prometheus.connectTimeoutMs=10000 \
--conf spark.metrics.conf.*.sink.prometheus.retryIntervalMs=3000 \
--conf spark.metrics.conf.*.sink.prometheus.jobName=MyJob1 \
--jars moduleB.jar,moduleC.jar \
--class mainClass \
/tmp/moduleA.jar \
<args...>

到这里为止，已经按照官方的metric system文档完成了所有的开发工作，但在运行起来后，你会发现下面的问题：

Prometheus Sink启动的问题

22/01/27 10:20:16 ERROR metrics.MetricsSystem: Sink class ....spark.metric.sink.PrometheusSink cannot be instantiated
22/01/27 10:20:16 ERROR yarn.ApplicationMaster: Uncaught exception: 
java.lang.ClassNotFoundException: ....spark.metric.sink.PrometheusSink
	at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
	at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
	at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:331)
	at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
	at java.lang.Class.forName0(Native Method)
	at java.lang.Class.forName(Class.java:348)
	at org.apache.spark.util.Utils$.classForName(Utils.scala:242)
	at org.apache.spark.metrics.MetricsSystem$$anonfun$registerSinks$1.apply(MetricsSystem.scala:198)
	at org.apache.spark.metrics.MetricsSystem$$anonfun$registerSinks$1.apply(MetricsSystem.scala:194)
	at scala.collection.mutable.HashMap$$anonfun$foreach$1.apply(HashMap.scala:130)
	at scala.collection.mutable.HashMap$$anonfun$foreach$1.apply(HashMap.scala:130)
	at scala.collection.mutable.HashTable$class.foreachEntry(HashTable.scala:236)
	at scala.collection.mutable.HashMap.foreachEntry(HashMap.scala:40)
	at scala.collection.mutable.HashMap.foreach(HashMap.scala:130)
	at org.apache.spark.metrics.MetricsSystem.registerSinks(MetricsSystem.scala:194)
	at org.apache.spark.metrics.MetricsSystem.start(MetricsSystem.scala:102)
	at org.apache.spark.deploy.yarn.ApplicationMaster.createAllocator(ApplicationMaster.scala:433)
	at org.apache.spark.deploy.yarn.ApplicationMaster.runDriver(ApplicationMaster.scala:460)
	at org.apache.spark.deploy.yarn.ApplicationMaster.run(ApplicationMaster.scala:275)
	at org.apache.spark.deploy.yarn.ApplicationMaster$$anon$3.run(ApplicationMaster.scala:805)
	at org.apache.spark.deploy.yarn.ApplicationMaster$$anon$3.run(ApplicationMaster.scala:804)
	at java.security.AccessController.doPrivileged(Native Method)
	at javax.security.auth.Subject.doAs(Subject.java:422)
	at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1875)
	at org.apache.spark.deploy.yarn.ApplicationMaster$.main(ApplicationMaster.scala:804)
	at org.apache.spark.deploy.yarn.ApplicationMaster.main(ApplicationMaster.scala)

在spark-submit中，通过--jars moduleB.jar设置了prometheus sink的代码（位于ModuleB中），理论上来说应该没有问题。

Spark Metrics System

spark metrics system的文档中有这么一段话：

syntax: [instance].sink|source.[name].[options]=[value]

#  This file configures Spark's internal metrics system. The metrics system is
#  divided into instances which correspond to internal components.
#  Each instance can be configured to report its metrics to one or more sinks.
#  Accepted values for [instance] are "master", "worker", "executor", "driver",
#  and "applications". A wildcard "*" can be used as an instance name, in
#  which case all instances will inherit the supplied property.

也就是说，一个正常运行的spark on yarn任务，应该包含applications|driver|executor三个metric instance，而我们上面的配置是*，也就是这三个instance都增加了prometheus sink，下面我修改了配置：

--conf spark.metrics.conf.driver.sink.prometheus.class=...spark.metric.sink.PrometheusSink \
--conf spark.metrics.conf.driver.sink.prometheus.zkHosts=xxx:2181 \
--conf spark.metrics.conf.driver.sink.prometheus.sessionTimeoutMs=30000 \
--conf spark.metrics.conf.driver.sink.prometheus.connectTimeoutMs=10000 \
--conf spark.metrics.conf.driver.sink.prometheus.retryIntervalMs=3000 \
--conf spark.metrics.conf.driver.sink.prometheus.jobName=MyJob1 \

以这个配置重写spark-submit，是可以正常运行的（这里注意：如果上面的配置，driver改为executor，依然会报ClassNotFoundException，这个原因和ApplicationMaster是一样的）。

到这里，可以说明spark driver和YarnAM之间加载的jar包是不一样的，需要通过源码分析一下。

spark启动过程源码分析

spark-submit.sh

SparkSubmit执行流程

这个类的主要作用是解析spark-submit传入的args，并反射调用org.apache.spark.deploy.yarn.YarnClusterApplication的main函数，源码很长，不过多解读，只了解几个很重要的地方。

--jars、--driver-library-path和--driver-class-path

在经过SparkSubmit处理后，进入YarnClusterApplication方法之前，会构造出三个spark config出来：

spark.yarn.dist.jars：等价于--jars中指定的jar包列表，但会做一些URI转换工作，例如相对路径转决定路径，globPath解析，FTP文件下载到本地等工作。
spark.driver.extra.classPath：等价于--driver-class-path，不做任何修改。
spark.driver.extra.libraryPath：等价于--driver-library-path，不做任何修改。

spark main jar

spark main jar，在源码中被称为primary resource，这个信息在SparkSubmit中被构造为YarnClusterApplication的main函数args入参，形式为：

--jar moduleA.jar
--class moduleA.mainClassName
--arg moduleA.main.args

到这里，SparkSubmit逻辑结束，进入YarnClusterApplication中。

YarnClusterApplication

这个类用于启动yarn AM，并在AM中启动spark driver端（也就是ModuleA）的代码。

其核心在AM的jar包是如何设置的。

AM启动流程

def submitApplication(): ApplicationId = {
    var appId: ApplicationId = null
    try {
      launcherBackend.connect()
      yarnClient.init(hadoopConf)
      yarnClient.start()

      logInfo("Requesting a new application from cluster with %d NodeManagers"
        .format(yarnClient.getYarnClusterMetrics.getNumNodeManagers))

      // Get a new application from our RM
      val newApp = yarnClient.createApplication()
      val newAppResponse = newApp.getNewApplicationResponse()
      appId = newAppResponse.getApplicationId()

      new CallerContext("CLIENT", sparkConf.get(APP_CALLER_CONTEXT),
        Option(appId.toString)).setCurrentContext()

      // Verify whether the cluster has enough resources for our AM
      verifyClusterResources(newAppResponse)

      // 重点！！！这里构造AM Container的上下文，所有jar包和Classpath相关都在这里面
      val containerContext = createContainerLaunchContext(newAppResponse)
      val appContext = createApplicationSubmissionContext(newApp, containerContext)

      // Finally, submit and monitor the application
      logInfo(s"Submitting application $appId to ResourceManager")
      yarnClient.submitApplication(appContext)
      launcherBackend.setAppId(appId.toString)
      reportLauncherState(SparkAppHandle.State.SUBMITTED)

      appId
    } catch {
      case e: Throwable =>
        if (appId != null) {
          cleanupStagingDir(appId)
        }
        throw e
    }
  }

AM CLASSPASH环境变量的设置

private def createContainerLaunchContext(newAppResponse: GetNewApplicationResponse)
    : ContainerLaunchContext = {
    logInfo("Setting up container launch context for our AM")
    val appId = newAppResponse.getApplicationId
    val appStagingDirPath = new Path(appStagingBaseDir, getAppStagingDir(appId))
    val pySparkArchives =
      if (sparkConf.get(IS_PYTHON_APP)) {
        findPySparkArchives()
      } else {
        Nil
      }

    //appStagingDirPath=hdfs://nameservice1/user/spark_cdh/.sparkStaging
    //pySparkArchives：不需要关注
    // 构造AM的Environment,其中包含CLASSPATH
    val launchEnv = setupLaunchEnv(appStagingDirPath, pySparkArchives)
    // 用于设置yarn container需要下载的资源，例如jar包，log4j.properties文件等
    val localResources = prepareLocalResources(appStagingDirPath, pySparkArchives)

    val amContainer = Records.newRecord(classOf[ContainerLaunchContext])
    amContainer.setLocalResources(localResources.asJava)
    amContainer.setEnvironment(launchEnv.asJava)

    //下面用于构造AM启动的脚本，就是一个java -server -jar ... <mainClass> <args>的命令
  }

private def setupLaunchEnv(
      stagingDirPath: Path,
      pySparkArchives: Seq[String]): HashMap[String, String] = {
    logInfo("Setting up the launch environment for our AM container")
    val env = new HashMap[String, String]()

    // 重点！！！ AM的classpath支持扩展，通过这个配置项控制，而这个配置项又是通过--driver-class-path设置的
    // DRIVER_CLASS_PATH = spark.driver.extraClassPath
    /**
     * env.put("CLASSPATH",origin+"DRIVER_CLASS_PATH")
     * env.put("CLASSPATH",origin+"{{PWD}}")
     * evn.put("CLASSPATH",origin+"{{PWD}}/__spark_conf__")
     * env.put("CLASSPATH",origin+"{{PWD}}/__spark_libs__/"*"")
     * env.put("CLASSPATH",origin+"/opt/cloudera/parcels/CDH-6.3.2-1.cdh6.3.2.p0.1605554/lib/spark/jars/"*")
     * env.put("CLASSPATH",origin+"/opt/cloudera/parcels/CDH-6.3.2-1.cdh6.3.2.p0.1605554/lib/spark/hive/"*")
     * env.put("CLASSPATH","{{PWD}}/__spark_conf__/__hadoop_conf__")
     * env.put("CLASSPATH","环境变量:SPARK_DIST_CLASSPATH")
     */
    populateClasspath(args, hadoopConf, sparkConf, env, sparkConf.get(DRIVER_CLASS_PATH))
    env("SPARK_YARN_STAGING_DIR") = stagingDirPath.toString
    env("SPARK_USER") = UserGroupInformation.getCurrentUser().getShortUserName()

    // Pick up any environment variables for the AM provided through spark.yarn.appMasterEnv.*
    val amEnvPrefix = "spark.yarn.appMasterEnv."

    /**
     * env.put("MKL_NUM_THREADS",1)
     * env.put("OPENBLAS_NUM_THREADS",1)
     */
    sparkConf.getAll
      .filter { case (k, v) => k.startsWith(amEnvPrefix) }
      .map { case (k, v) => (k.substring(amEnvPrefix.length), v) }
      .foreach { case (k, v) => YarnSparkHadoopUtil.addPathToEnvironment(env, k, v) }

    // SPARK_DIST_CLASSPATH
    /**
     * 这里注意：env之设置最后一个
     */
    sys.env.get(ENV_DIST_CLASSPATH).foreach { dcp =>
      env(ENV_DIST_CLASSPATH) = dcp
    }

    env
  }

Container资源上传到HDFS

这个方法也很长，只需要了解：

spark primary resource(也就是moduleA.jar)上传到HDFS，并重命名为__app__.jar。
--jars的文件，上传到HDFS。

启动AM

核心是生成一个command，其内容大致为：

LD_LIBRARY_PATH=\"{spark.driver.extraLibraryPath}:$LD_LIBRARY_PATH\"
{{JAVA_HOME}}/bin/java \ 
               -server \ 
               org.apache.spark.deploy.yarn.ApplicationMaster
               --class MyModuleAMainClass \
               --jar ModuleA.jar
               --arg <spark-submit mainClass args>
               1><LOG_DIR>/stdout \ 
               2><LOG_DIR>/stderr

【这里有个问题没有弄懂】spark.driver.extraLibraryPath（也就是通过--driver-library-path设置的jar包），不会影响AM的CLASSPATH，那么设置LD_LIBRARY_PATH的意义纠结何在？

从上面的命令可以分析出，通过spark-submit --jars设置的jar包，并不会作为AM的CLASSPATH，那么spark driver能启动，也就说明AM与driver的关系应该如下图：

ApplicationMaster

//1、AM的入口，主线程
def main(args: Array[String]): Unit = {
    SignalUtils.registerLogger(log)
    val amArgs = new ApplicationMasterArguments(args)
    master = new ApplicationMaster(amArgs)
    System.exit(master.run())
  }

//2、kerberos
final def run(): Int = {
    doAsUser {
      runImpl()
    }
    exitCode
  }

private def runImpl(): Unit = {
      if (isClusterMode) {
        runDriver()
      } else {
        runExecutorLauncher()
      }
  }


private def runDriver(): Unit = {
    addAmIpFilter(None)
    //重点！！！启动spark driver
    userClassThread = startUserApplication()

    // This a bit hacky, but we need to wait until the spark.driver.port property has
    // been set by the Thread executing the user class.
    logInfo("Waiting for spark context initialization...")
    val totalWaitTime = sparkConf.get(AM_MAX_WAIT_TIME)
    try {
      //重点！！！等待sparkContext初始化结束（也就表示driver执行成功），但不会无限等待，默认100S
      val sc = ThreadUtils.awaitResult(sparkContextPromise.future,
        Duration(totalWaitTime, TimeUnit.MILLISECONDS))
      if (sc != null) {
        rpcEnv = sc.env.rpcEnv

        val userConf = sc.getConf
        val host = userConf.get("spark.driver.host")
        val port = userConf.get("spark.driver.port").toInt
        registerAM(host, port, userConf, sc.ui.map(_.webUrl))

        val driverRef = rpcEnv.setupEndpointRef(
          RpcAddress(host, port),
          YarnSchedulerBackend.ENDPOINT_NAME)

        //重点！！！这里面就是负责调用AM端的prometheus sink
        createAllocator(driverRef, userConf)
      }
  }




private def startUserApplication(): Thread = {
    // userClassLoader负责加载了__app__.jar（也就是moduleA.jar）和spark-submit --jars指定的所有jar包
    val mainMethod = userClassLoader.loadClass(args.userClass)
      .getMethod("main", classOf[Array[String]])

    val userThread = new Thread {
      //负责调用moduleA.jar的main方法，初始化sparkContext
    }
    userThread.setContextClassLoader(userClassLoader)
    userThread.setName("Driver")
    userThread.start()
    userThread
  }

/**
* 这里大概介绍一下spark的user jar first。
* 核心在于ChildFirstURLClassLoader extend URLClassloader，
* 这个类的特点是在调用URLClassLoader的构造函数时，parent传入的值为null。
* 而在ChildFirstURLClassLoader内部维护了一个URLClassloader parent的成员变量，
* 后续所有的双亲委派机制，都是自己实现的，
* 首先从ChildFirstURLClassLoader的Resource中加载class，失败后从parent中重试。
*/
private val userClassLoader = {
    val classpath = Client.getUserClasspath(sparkConf)
    val urls = classpath.map { entry =>
      new URL("file:" + new File(entry.getPath()).getAbsolutePath())
    }

    if (isClusterMode) {
      if (Client.isUserClassPathFirst(sparkConf, isDriver = true)) {
        new ChildFirstURLClassLoader(urls, Utils.getContextOrSparkClassLoader)
      } else {
        new MutableURLClassLoader(urls, Utils.getContextOrSparkClassLoader)
      }
    } else {
      new MutableURLClassLoader(urls, Utils.getContextOrSparkClassLoader)
    }
  }

解决ClassNotFound

到这里我们知道了，AM的默认CLASSPATH是不包含moduleA.jar、moduleB.jar和moduleC.jar的，因此找不到prometheus sink很正常，添加如下配置让AM增加moduleB.jar

spark-submit ... --driver-class-path moduleB.jar --class xxx /moduleA.jar ...

在最开始的背景部分提到了，moduleA、moduleB和moduleC之间存在类的引用，因此上面配置后，AM的JVM classloader结构如下：

由于上面的类分布，在ModuleA启动时，会去加载ModuleB的ClassB，而ClassB需要使用ModuleC的ClassC，此时从ClassB去load ClassC时，就会出现ClassC Not Found Exception。

要解决上面的问题，只要保证通过--driver-class-path设置的jar不要引用--jars和main jar就可以了（但是main jar和--jars可以引用--driver-class-path的jar），我的解决方案是将prometheus sink的编码独立成一个jar包。

executor也出现ClassNotFoundException

原因与AM出现的一样，spark-submit增加下面的配置即可解决。

--conf spark.executor.extraClassPath=spark-prometheus-metrics-1.0.jar

AM和Driver的http server暴露相同的内容

由于AM和Driver都位于同一个JVM中，虽然AM和Driver的MetricSystem、Sink、Reporter都是独立的实例，但它们将自己的metric都写入了相同的prometheus Collector中（这是由于prometheus的官方推荐写法决定的，官方推荐Collector对象都要作为常量存在），从而导致AM和Driver暴露的两个HttpServer，其返回的内容是一模一样的，为此的解决方案如下：

代码结构

PrometheusSink重构

package ;

import com.codahale.metrics.MetricRegistry;
import xxx.reporter.PrometheusReporter;
import io.prometheus.client.exporter.HTTPServer;
import org.apache.commons.lang3.StringUtils;
import org.apache.curator.framework.CuratorFramework;
import org.apache.curator.framework.CuratorFrameworkFactory;
import org.apache.curator.retry.RetryForever;
import org.apache.spark.metrics.sink.Sink;
import org.apache.zookeeper.CreateMode;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.net.InetAddress;
import java.net.InetSocketAddress;
import java.net.ServerSocket;
import java.nio.charset.StandardCharsets;
import java.util.Properties;
import java.util.concurrent.TimeUnit;

/**
 * spark 配置：
 * --conf spark.metrics.conf.*.sink.prometheus.class=xxx.sink.PrometheusSink \
 * --conf spark.metrics.conf.*.sink.prometheus.zkHosts=xxx \
 * --conf spark.metrics.conf.*.sink.prometheus.sessionTimeoutMs=30000 \
 * --conf spark.metrics.conf.*.sink.prometheus.connectTimeoutMs=10000 \
 * --conf spark.metrics.conf.*.sink.prometheus.retryIntervalMs=3000 \
 * --conf spark.metrics.conf.*.sink.prometheus.jobName=BLOOD_TEST1 \
 * --conf spark.executor.extraClassPath=spark-prometheus-metrics-1.0.jar \
 * --driver-class-path spark-prometheus-metrics-1.0.jar \
 *
 *
 * NOTE：在yarn cluster模式下，driver和applicationMaster位于同一个JVM中，
 * 此时sink会被new两次，
 * 但是prometheus的collector却是一个的，因此会导致两个http server的内容
 * 是一模一样的。
 * 解决方案：
 * 只启动一个sink，但由于使用反射构造sink，因此单例模式不可用，
 * 通过volatile来实现多线程下的单例
 * @Author:
 * @Date: 2022/1/17
 * @TIME: 11:08
 */
public class PrometheusSink implements Sink {
    private final static Logger LOG = LoggerFactory.getLogger(PrometheusSink.class);
    private static volatile boolean HAS_ONE_INSTANCE = false;
    private final static String ZK_HOSTS_KEY = "zkHosts";
    private final static String ZK_SESSION_TIMEOUT_MS = "sessionTimeoutMs";
    private final static String ZK_CONNECT_TIMEOUT_MS = "connectTimeoutMs";
    private final static String ZK_RETRY_INTERVAL_MS = "retryIntervalMs";
    private final static String SPARK_JOB_NAME = "jobName";
    private final Properties property;
    private final MetricRegistry metricRegistry;
    private final boolean canRun; //canRun表示允许允许http server，但无论是否canRun，都要允许reporter
    private HTTPServer httpServer;
    private CuratorFramework zkClient;
    private PrometheusReporter prometheusReporter;
    public PrometheusSink(Properties property,
                          MetricRegistry registry,
                          org.apache.spark.SecurityManager securityMgr) {
        this.property = property;
        this.metricRegistry = registry;
        if(!HAS_ONE_INSTANCE) {
            HAS_ONE_INSTANCE = true;
            this.canRun = true;
        }else
            this.canRun = false;
    }

    @Override
    public void start() {
        try {
            String zkHosts = this.property.getProperty(ZK_HOSTS_KEY);
            if (StringUtils.isEmpty(zkHosts))
                throw new RuntimeException("[[instance].sink.prometheus.zkHosts]不能为空");
            int sessionTimeoutMs = Integer.parseInt(this.property
                    .getProperty(ZK_SESSION_TIMEOUT_MS, "30000"));
            int connectTimeoutMs = Integer.parseInt(this.property
                    .getProperty(ZK_CONNECT_TIMEOUT_MS, "10000"));
            int retryInterval = Integer.parseInt(this.property
                    .getProperty(ZK_RETRY_INTERVAL_MS, "3000"));
            String jobName = this.property.getProperty(SPARK_JOB_NAME);
            if (StringUtils.isEmpty(jobName))
                throw new RuntimeException("[[instance].sink.prometheus.jobName]不能为空");
            this.prometheusReporter = new PrometheusReporter(this.metricRegistry,jobName);
            int retryCount = 0;
            //是否允许启动http endpoint
            if(this.canRun) {
                InetSocketAddress inetSocketAddress = new InetSocketAddress(InetAddress.getLocalHost(), 0);
                String url = "http://%s:%d/metrics";
                String ip = null;
                int port;
                while (true) {
                    try {
                        try (ServerSocket serverSocket = new ServerSocket()) {
                            serverSocket.bind(inetSocketAddress);
                            ip = inetSocketAddress.getAddress().getHostAddress();
                            port = serverSocket.getLocalPort();
                            url = String.format(url, ip, port);
                        }
                        this.httpServer = new HTTPServer(port);
                        break;
                    } catch (IOException ioException) {
                        if(++retryCount==3) {
                            throw ioException;
                        }
                    }
                }
                LOG.info("start [Prometheus] metrics sink\nHttp EndPoint:{}",String.format(url,ip, port));

                //注册ZK
                this.zkClient = CuratorFrameworkFactory.newClient(zkHosts,sessionTimeoutMs,connectTimeoutMs,new RetryForever(retryInterval));
                this.zkClient.start();
                LOG.info("启动zk client,zkHosts=[{}],sessionTimeout=[{}] ms,connectTimeout=[{}] ms," +
                                "failOverRetryInterval=[{}] ms",zkHosts,sessionTimeoutMs,connectTimeoutMs,
                        retryInterval);
                this.zkClient.blockUntilConnected();
                String zNodePath = "/waterfall/spark/metrics/promethues/"+jobName+"/config";
                this.zkClient.create().creatingParentsIfNeeded().withMode(CreateMode.EPHEMERAL_SEQUENTIAL)
                        .forPath(zNodePath,url.getBytes(StandardCharsets.US_ASCII));
            }
        }catch (Exception e) {
            throw new RuntimeException("启动prometheus metrics失败",e);
        }
        this.prometheusReporter.start(15, TimeUnit.SECONDS);
    }

    @Override
    public void stop() {
        if(this.canRun) {
            if(zkClient!=null) {
                zkClient.close();
            }
            if(this.httpServer!=null)
                this.httpServer.close();
        }
        this.prometheusReporter.stop();
    }

    @Override
    public void report() {
        //源码来看，只有在JVM停止前才会触发report
        LOG.info("JVM进程即将停止，会自动调用一次report");
        this.prometheusReporter.report();
    }
}

PrometheusReporter重构

package ;

import com.codahale.metrics.*;
import xxx.reporter.collector.SingletonCunterCollector;
import xxx.reporter.collector.SingletonGaugeCollector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.lang.reflect.Field;
import java.util.HashMap;
import java.util.Map;
import java.util.Optional;
import java.util.SortedMap;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * NOTE：当运行在YARN Cluster模式下时，AM和Driver任务都属于同一个JVM，但是sink会被new两次,
 * 且按照源码来看，第一次new一定是driver，第二次new一定是AM
 *
 * @Author:
 * @Date: 2022/1/17
 * @TIME: 13:53
 */
public class PrometheusReporter extends ScheduledReporter {
    private static final Logger LOG = LoggerFactory.getLogger(PrometheusReporter.class);
    private static final Pattern METRIC_NAME_PATTERN = Pattern.compile("(application_\\d+_\\d+)\\.([a-zA-z]+)\\.(.+)");
    //yarn-AppId||线程角色，例如driver、executor等||spark任务名||spark原生metric name
    private static final String[] LABLE_NAMES = {"applicationId", "role", "jobName", "originName"};
    private static final Map<Class<?>, Field> TIME_FIELD_CACHE;
    static {
        TIME_FIELD_CACHE = new HashMap<>();
        try {
            Field meterField = Timer.class.getDeclaredField("meter");
            meterField.setAccessible(true);
            TIME_FIELD_CACHE.put(Meter.class,meterField);
        } catch (NoSuchFieldException e) {
            LOG.warn("获取Timer的成员属性meter的Field对象失败",e);
        }
        try {
            Field histogramField = Timer.class.getDeclaredField("histogram");
            histogramField.setAccessible(true);
            TIME_FIELD_CACHE.put(Histogram.class,histogramField);
        } catch (NoSuchFieldException e) {
            LOG.warn("获取Timer的成员属性histogram的Field对象失败",e);
        }
    }
    private final String jobName;
    //用于存储origin-metric-name提取出来的string[3]数组，减少重复的正则计算次数
    private final Map<String, String[]> cache = new HashMap<>();

    public PrometheusReporter(MetricRegistry registry, String jobName) {
        super(registry, "prometheus-reporter", MetricFilter.ALL, TimeUnit.SECONDS, TimeUnit.SECONDS);
        this.jobName = this.formatName(jobName);
    }

    @SuppressWarnings("all")
    @Override
    public void report(SortedMap<String, Gauge> gaugeMap,
                       SortedMap<String, Counter> countMap,
                       SortedMap<String, Histogram> histogramMap,
                       SortedMap<String, Meter> meterMap,
                       SortedMap<String, Timer> timerMap) {
        if (gaugeMap != null) {
            for (Map.Entry<String, Gauge> gaugeEntry : gaugeMap.entrySet()) {
                this.gaugeAdaptor(gaugeEntry.getKey(), gaugeEntry.getValue());
            }
        }

        if (countMap != null) {
            for (Map.Entry<String, Counter> counterEntry : countMap.entrySet()) {
                this.counterAdaptor(counterEntry.getKey(), counterEntry.getValue());
            }
        }

        if (histogramMap != null) {
            for (Map.Entry<String, Histogram> histogramEntry : histogramMap.entrySet()) {
                this.histogramAdaptor(histogramEntry.getKey(), histogramEntry.getValue());
            }
        }

        if (meterMap != null) {
            for (Map.Entry<String, Meter> meterEntry : meterMap.entrySet()) {
                this.meterAdaptor(meterEntry.getKey(), meterEntry.getValue());
            }
        }

        if (timerMap != null) {
            for (Map.Entry<String, Timer> timerEntry : timerMap.entrySet()) {
                try {
                    //1、采集meter
                    Field meterField = TIME_FIELD_CACHE.get(Meter.class);
                    if(meterField!=null) {
                        Meter meter = (Meter)meterField.get(timerEntry.getValue());
                        this.meterAdaptor(timerEntry.getKey(),meter);
                    }
                } catch (IllegalAccessException e) {
                    LOG.warn("尝试读取Timer的私有成员属性meter失败",e);
                }

                //2、采集histogram
                try {
                    //1、采集meter
                    Field histogramField = TIME_FIELD_CACHE.get(Histogram.class);
                    if(histogramField!=null) {
                        Histogram histogram = (Histogram)histogramField.get(timerEntry.getValue());
                        this.histogramAdaptor(timerEntry.getKey(),histogram);
                    }
                } catch (IllegalAccessException e) {
                    LOG.warn("尝试读取Timer的私有成员属性histogram失败",e);
                }
            }
        }
    }

    /**
     * spark gauge类型转换为prometheus支持的类型<br/>
     * NOTE: spark gauge类型可能存储非NUMBER类型的数据，这里只采集number类型的
     *
     * @param originMetricName
     * @param originGauge
     */
    private void gaugeAdaptor(String originMetricName, Gauge<?> originGauge) {
        //1、originGauge只能是Number类型的
        Object originValue = originGauge.getValue();
        if (!(originValue instanceof Number))
            return;
        String[] arrContainer = this.analyzeOriginMetricName(originMetricName);
        if (arrContainer.length == 3) {
            String appId = arrContainer[0];
            String role = arrContainer[1];
            String newMetricName = arrContainer[2];
            //prometheus collector采集数据
            SingletonGaugeCollector collector = SingletonGaugeCollector.getInstance(this.jobName);
            collector.setValue(
                    newMetricName,
                    originMetricName,
                    ((Number) originValue).doubleValue(),
                    LABLE_NAMES,
                    appId, role, this.jobName, originMetricName);
        }
    }

    /**
     * spark counter类型转换为prometheus支持的类型
     *
     * @param originMetricName
     * @param originCounter
     */
    private void counterAdaptor(String originMetricName, Counter originCounter) {
        String[] arrContainer = this.analyzeOriginMetricName(originMetricName);
        if (arrContainer.length == 3) {
            String appId = arrContainer[0];
            String role = arrContainer[1];
            String newMetricName = arrContainer[2];
            //prometheus collector采集数据
            SingletonCunterCollector singletonCunterCollector
                    = SingletonCunterCollector.getInstance(this.jobName);
            singletonCunterCollector.incr(
                    newMetricName,
                    originMetricName,
                    originCounter.getCount(),
                    LABLE_NAMES,
                    appId, role, this.jobName, originMetricName);
        }
    }

    /**
     * 将spark histogram转换为prometheus支持的数据类型，这里直接转换为gauge
     *
     * @param originMetricName
     * @param originHistogram
     */
    private void histogramAdaptor(String originMetricName, Histogram originHistogram) {
        String[] arrContainer = this.analyzeOriginMetricName(originMetricName);
        if (arrContainer.length == 3) {
            String appId = arrContainer[0];
            String role = arrContainer[1];
            String newMetricNamePrefix = arrContainer[2];
            //prometheus collector采集数据
            SingletonGaugeCollector collector = SingletonGaugeCollector.getInstance(this.jobName);
            //1、histogram count
            String suffix = "count";
            String newMetricName = newMetricNamePrefix + "_" + suffix;
            collector.setValue(
                    newMetricName,
                    originMetricName + "," + suffix,
                    originHistogram.getCount(),
                    LABLE_NAMES,
                    appId, role, this.jobName, originMetricName + "," + suffix);
            //2、histogram max
            suffix = "max";
            newMetricName = newMetricNamePrefix + "_" + suffix;
            collector.setValue(
                    newMetricName,
                    originMetricName + "," + suffix,
                    originHistogram.getSnapshot().getMax(),
                    LABLE_NAMES,
                    appId, role, this.jobName, originMetricName + "," + suffix);
            //3、histogram mean
            suffix = "mean";
            newMetricName = newMetricNamePrefix + "_" + suffix;
            collector.setValue(
                    newMetricName,
                    originMetricName + "," + suffix,
                    originHistogram.getSnapshot().getMean(),
                    LABLE_NAMES,
                    appId, role, this.jobName, originMetricName + "," + suffix);
            //4、histogram min
            suffix = "min";
            newMetricName = newMetricNamePrefix + "_" + suffix;
            collector.setValue(
                    newMetricName,
                    originMetricName + "," + suffix,
                    originHistogram.getSnapshot().getMin(),
                    LABLE_NAMES,
                    appId, role, this.jobName, originMetricName + "," + suffix);
            //5、histogram p50
            suffix = "p50";
            newMetricName = newMetricNamePrefix + "_" + suffix;
            collector.setValue(
                    newMetricName,
                    originMetricName + "," + suffix,
                    originHistogram.getSnapshot().getMedian(),
                    LABLE_NAMES,
                    appId, role, this.jobName, originMetricName + "," + suffix);
            //6、histogram p75
            suffix = "p75";
            newMetricName = newMetricNamePrefix + "_" + suffix;
            collector.setValue(
                    newMetricName,
                    originMetricName + "," + suffix,
                    originHistogram.getSnapshot().get75thPercentile(),
                    LABLE_NAMES,
                    appId, role, this.jobName, originMetricName + "," + suffix);
            //7、histogram p95
            suffix = "p95";
            newMetricName = newMetricNamePrefix + "_" + suffix;
            collector.setValue(
                    newMetricName,
                    originMetricName + "," + suffix,
                    originHistogram.getSnapshot().get95thPercentile(),
                    LABLE_NAMES,
                    appId, role, this.jobName, originMetricName + "," + suffix);
            //8、histogram p98
            suffix = "p98";
            newMetricName = newMetricNamePrefix + "_" + suffix;
            collector.setValue(
                    newMetricName,
                    originMetricName + "," + suffix,
                    originHistogram.getSnapshot().get98thPercentile(),
                    LABLE_NAMES,
                    appId, role, this.jobName, originMetricName + "," + suffix);
            //9、histogram p99
            suffix = "p99";
            newMetricName = newMetricNamePrefix + "_" + suffix;
            collector.setValue(
                    newMetricName,
                    originMetricName + "," + suffix,
                    originHistogram.getSnapshot().get99thPercentile(),
                    LABLE_NAMES,
                    appId, role, this.jobName, originMetricName + "," + suffix);
            //10、histogram p999
            suffix = "p999";
            newMetricName = newMetricNamePrefix + "_" + suffix;
            collector.setValue(
                    newMetricName,
                    originMetricName + "," + suffix,
                    originHistogram.getSnapshot().get999thPercentile(),
                    LABLE_NAMES,
                    appId, role, this.jobName, originMetricName + "," + suffix);
            //11、histogram stddev
            suffix = "stddev";
            newMetricName = newMetricNamePrefix + "_" + suffix;
            collector.setValue(
                    newMetricName,
                    originMetricName + "," + suffix,
                    originHistogram.getSnapshot().getStdDev(),
                    LABLE_NAMES,
                    appId, role, this.jobName, originMetricName + "," + suffix);
        }
    }

    private void meterAdaptor(String originMetricName, Meter originMeter) {
        String[] arrContainer = this.analyzeOriginMetricName(originMetricName);
        if (arrContainer.length == 3) {
            String appId = arrContainer[0];
            String role = arrContainer[1];
            String newMetricNamePrefix = arrContainer[2];
            //prometheus collector采集数据
            SingletonGaugeCollector collector = SingletonGaugeCollector.getInstance(this.jobName);
            //1、meter count
            String suffix = "count";
            String newMetricName = newMetricNamePrefix + "_" + suffix;
            collector.setValue(
                    newMetricName,
                    originMetricName + "," + suffix,
                    originMeter.getCount(),
                    LABLE_NAMES,
                    appId, role, this.jobName, originMetricName + "," + suffix);
            //2、
            suffix = "ratePer1m";
            newMetricName = newMetricNamePrefix + "_" + suffix;
            collector.setValue(
                    newMetricName,
                    originMetricName + "," + suffix,
                    originMeter.getOneMinuteRate(),
                    LABLE_NAMES,
                    appId, role, this.jobName, originMetricName + "," + suffix);
            //3、
            suffix = "ratePer5m";
            newMetricName = newMetricNamePrefix + "_" + suffix;
            collector.setValue(
                    newMetricName,
                    originMetricName + "," + suffix,
                    originMeter.getFiveMinuteRate(),
                    LABLE_NAMES,
                    appId, role, this.jobName, originMetricName + "," + suffix);
            //4、
            suffix = "ratePer15m";
            newMetricName = newMetricNamePrefix + "_" + suffix;
            collector.setValue(
                    newMetricName,
                    originMetricName + "," + suffix,
                    originMeter.getFifteenMinuteRate(),
                    LABLE_NAMES,
                    appId, role, this.jobName, originMetricName + "," + suffix);
            //5、
            suffix = "meanRate";
            newMetricName = newMetricNamePrefix + "_" + suffix;
            collector.setValue(
                    newMetricName,
                    originMetricName + "," + suffix,
                    originMeter.getMeanRate(),
                    LABLE_NAMES,
                    appId, role, this.jobName, originMetricName + "," + suffix);
        }
    }

    /**
     * 格式化字符串，将“.”都转换为"_"
     *
     * @param originVal
     * @return
     */
    private String formatName(String originVal) {
        return originVal.replaceAll("\\.", "_");
    }

    /**
     * 解析spark metric name，提取出其中的信息
     *
     * @param originMetricName
     * @return {applicationId,role,originMetricName去掉applicationId和role}<br/>
     * NOTE:String[2]需要进行字符串替换，将"."都替换为"_"
     */
    private String[] analyzeOriginMetricName(String originMetricName) {
        String[] arrContainer = null;
        if ((arrContainer = this.cache.get(originMetricName)) == null) {
            Matcher matcher = METRIC_NAME_PATTERN.matcher(originMetricName);
            //全词匹配，这个regex一定满足一个完整的name的要求
            if (matcher.matches()) {
                arrContainer = new String[]{matcher.group(1), matcher.group(2), this.formatName(matcher.group(3))};
            } else {
                arrContainer = new String[0];
            }
            this.cache.put(originMetricName, arrContainer);
        }
        return arrContainer;
    }
}

单例的Prometheus Collector

package ;

import io.prometheus.client.Gauge;

import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;

/**
 * @Author: 
 * @Date: 2022/2/9
 * @TIME: 10:56
 */
public class SingletonGaugeCollector {
    private static SingletonGaugeCollector INSTANCE = null;
    private static volatile boolean initFlag = false;
    private final String jobName;
    //key=metricname
    private final Map<String,Gauge> gaugeMap = new ConcurrentHashMap<>();
    private SingletonGaugeCollector(String jobName) {
        this.jobName = jobName+"_";
    }

    /**
     * 获取 prometheus Gauge Collector的单例对象
     * @param jobName spark 任务名称
     * @return
     */
    public static SingletonGaugeCollector getInstance(String jobName) {
        if(INSTANCE==null) {
            synchronized (SingletonGaugeCollector.class) {
                if(INSTANCE==null) {
                    INSTANCE = new SingletonGaugeCollector(jobName);
                    initFlag = true;
                }
            }
        }
        return INSTANCE;
    }

    /**
     * 采集数据
     * @param metricName metric name
     * @param help
     * @param val metric value
     * @param lableNames metric lables
     * @param lableValues metric lables values
     */
    public void setValue(String metricName,String help,double val,String[] lableNames,String... lableValues) {
        for(;;) {
            if(this.gaugeMap.containsKey(metricName)) {
                this.gaugeMap.get(metricName).labels(lableValues)
                        .set(val);
                break;
            }else {
                synchronized (this) {
                    if(!this.gaugeMap.containsKey(metricName)) {
                        Gauge gauge = Gauge.build(
                                Contant.COLLECTOR_NAME_PREFIX + jobName+metricName,
                                help)
                                .labelNames(lableNames).register();
                        this.gaugeMap.put(metricName,gauge);
                    }
                }
            }
        }
    }
}

运行结果

1个AM，1个Driver，1个Executor实例，共2个httpServer，其中一个包含了AM和Driver的metrics信息，符合要求。

Executor进程的metrics信息为空

原因处在PrometheusReporter的正则表达式上，executor的role部分（也就是正则表达式的group(2)），是一个数字，而原来的正则表达式只能匹配非数字的role，修改为[a-zA-z0-9]即可。

还有一点需要注意，executor的metric name中，有形如"applicationId_xxx_xxx.1.A.B-B1.C"的字符串，其中"-"符号对于prometheus来说，是非法的，需要替换掉。

游走的bit

关注

2
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
[Spark2.4] 增加Prometheus Sink带来的问题，并通过源码分析原因

背景当前有一个multi-module project，其中有ModuleA，ModuleB，ModuleC。ModuleA中定义了main函数，会通过SPI和反射等机制，从ModuleC中加载Spark foreachRDD方法的具体执行逻辑。ModuleB中定义了sparkstreaming context的配置，以及DStream的处理顺序。ModuleC中主要定义了foreachRDD的具体执行逻辑，例如将RDD对象反序列化后，写入Hbase中。三个Module的mvn依赖关系如
复制链接

扫一扫