Spark配置信息
Spark的配置信息读取一般都会通过ConfigProvider
类来完成,该类是一个抽象类,它的一系列子类实现了从各种渠道中读取配置信息到内存中[Map]这里是一个javaMap不是ScalaMap
代码如下所示
package org.apache.spark.internal.config
import java.util.{Map => JMap}
import org.apache.spark.SparkConf
/**
* A source of configuration values.
*/
private[spark] trait ConfigProvider {
def get(key: String): Option[String]
}
private[spark] class EnvProvider extends ConfigProvider {
override def get(key: String): Option[String] = sys.env.get(key)
}
private[spark] class SystemProvider extends ConfigProvider {
override def get(key: String): Option[String] = sys.props.get(key)
}
private[spark] class MapProvider(conf: JMap[String, String]) extends ConfigProvider {
override def get(key: String): Option[String] = Option(conf.get(key))
}
/**
* A config provider that only reads Spark config keys.
*/
private[spark] class SparkConfigProvider(conf: JMap[String, String]) extends ConfigProvider {
override def get(key: String): Option[String] = {
if (key.startsWith("spark.")) {
Option(conf.get(key)).orElse(SparkConf.getDeprecatedConfig(key, conf))
} else {
None
}
}
}
代码中有提到,其实一般读取配置信息的实体类为SparkConfigProvider
。
但是真正给Transportcontext提供配置信息的类不是SparkConfigProvider
而是SparkTransportConf
,该类提供一个实例,用于将Spark JVM中的SparkConf转换为带有环境详细信息的TransportConf
如分配给这个JVM的核数。
这个实体类主要提供如下的代码读取配置信息。
def fromSparkConf(_conf: SparkConf, module: String, numUsableCores: Int = 0): TransportConf = {
val conf = _conf.clone
// Specify thread configuration based on our JVM's allocation of cores (rather than necessarily
// assuming we have all the machine's cores).
// NB: Only set if serverThreads/clientThreads not already set.
val numThreads = defaultNumThreads(numUsableCores)
conf.setIfMissing(s"spark.$module.io.serverThreads", numThreads.toString)
conf.setIfMissing(s"spark.$module.io.clientThreads", numThreads.toString)
new TransportConf(module, new ConfigProvider {
override def get(name: String): String = conf.get(name)
override def get(name: String, defaultValue: String): String = conf.get(name, defaultValue)
override def getAll(): java.lang.Iterable[java.util.Map.Entry[String, String]] = {
conf.getAll.toMap.asJava.entrySet()
}
})
}
其中_conf: SparkConf
为spark配置, module: String
为模块名称。