Akka是Actor编程模型的库,用于提供高并发的Actor编程模型,Spark即利用ActorSystem实现分布式消息的发送和并发编程。下面将通过源码分析来观察Spark中ActorSystem的创建与启动。
首先在SparkEnv.scala中
// Create a ActorSystem for legacy codes,即通过AkkaUtils.createActorSystem方法创建ActorSystem
AkkaUtils.createActorSystem(
actorSystemName + "ActorSystem",
hostname,
actorSystemPort,
conf,
securityManager
)._1
在AkkaUtils.scala中可以发现createActorSystem的源码
def createActorSystem(
//传入name、host、port、conf等信息,返回值即startServiceOnPort的返回值
name: String, host: String, port: Int, conf: SparkConf, securityManager: SecurityManager): (ActorSystem, Int) = {
//对doCreateActorSystem函数进行柯里化,实际通过doCreateActorSystem启动Akka
val startService: Int => (ActorSystem, Int) = { actualPort => doCreateActorSystem(name, host, actualPort, conf, securityManager) }
//返回值是(T,int)在这里是(ActorSystem,int)
Utils.startServiceOnPort(port, startService, conf, name)}
在Utils.scala中可以看到在startServiceOnPort中调用柯里化后的doCreateActorSystem启动Akka的代码
def startServiceOnPort[T]( startPort: Int, startService: Int => (T, Int), conf: SparkConf, serviceName: String = ""): (T, Int) = { require(startPort == 0 || (1024 <= startPort && startPort < 65536), "startPort should be between 1024 and 65535 (inclusive), or 0 for a random free port.") val serviceString = if (serviceName.isEmpty) "" else s" '$serviceName'" val maxRetries = portMaxRetries(conf) for (offset <- 0 to maxRetries) { // Do not increment port if startPort is 0, which is treated as a special port val tryPort = if (startPort == 0) { startPort } else { // If the new port wraps around, do not try a privilege port ((startPort + offset - 1024) % (65536 - 1024)) + 1024 } try {
val (service, port) = startService(tryPort) logInfo(s"Successfully started service$serviceString on port $port.") return (service, port) } catch { case e: Exception if isBindCollision(e) => if (offset >= maxRetries) { val exceptionMessage = s"${e.getMessage}: Service$serviceString failed after " + s"$maxRetries retries! Consider explicitly setting the appropriate port for the " + s"service$serviceString (for example spark.ui.port for SparkUI) to an available " + "port or increasing spark.port.maxRetries." val exception = new BindException(exceptionMessage) // restore original stack trace exception.setStackTrace(e.getStackTrace) throw exception } logWarning(s"Service$serviceString could not bind on port $tryPort. " + s"Attempting port ${tryPort + 1}.") } } // Should never happen throw new SparkException(s"Failed to start service$serviceString on port $startPort") } 现在我们可以分析一下用于启动Akka的函数doCreateActorSystem的源码,该源码同样在AkkaUtils.scala中//startServiceOnPort中调用startService,在本例中传入的startService就是柯里化后的doCreateActorSystem用于启动Akka
private def doCreateActorSystem( name: String, host: String, port: Int, conf: SparkConf, securityManager: SecurityManager): (ActorSystem, Int) = { //对conf中的一些参数进行设置 val akkaThreads = conf.getInt("spark.akka.threads", 4) val akkaBatchSize = conf.getInt("spark.akka.batchSize", 15) val akkaTimeoutS = conf.getTimeAsSeconds("spark.akka.timeout", conf.get("spark.network.timeout", "120s")) val akkaFrameSize = maxFrameSizeBytes(conf) val akkaLogLifecycleEvents = conf.getBoolean("spark.akka.logLifecycleEvents", false) val lifecycleEvents = if (akkaLogLifecycleEvents) "on" else "off" if (!akkaLogLifecycleEvents) { // As a workaround for Akka issue #3787, we coerce the "EndpointWriter" log to be silent. // See: https://www.assembla.com/spaces/akka/tickets/3787#/ Option(Logger.getLogger("akka.remote.EndpointWriter")).map(l => l.setLevel(Level.FATAL)) } val logAkkaConfig = if (conf.getBoolean("spark.akka.logAkkaConfig", false)) "on" else "off" val akkaHeartBeatPausesS = conf.getTimeAsSeconds("spark.akka.heartbeat.pauses", "6000s") val akkaHeartBeatIntervalS = conf.getTimeAsSeconds("spark.akka.heartbeat.interval", "1000s") val secretKey = securityManager.getSecretKey() val isAuthOn = securityManager.isAuthenticationEnabled() if (isAuthOn && secretKey == null) { throw new Exception("Secret key is null with authentication on") } val requireCookie = if (isAuthOn) "on" else "off" val secureCookie = if (isAuthOn) secretKey else "" logDebug(s"In createActorSystem, requireCookie is: $requireCookie") val akkaSslConfig = securityManager.akkaSSLOptions.createAkkaConfig .getOrElse(ConfigFactory.empty()) val akkaConf = ConfigFactory.parseMap(conf.getAkkaConf.toMap.asJava) .withFallback(akkaSslConfig).withFallback(ConfigFactory.parseString( s""" |akka.daemonic = on |akka.loggers = [""akka.event.slf4j.Slf4jLogger""] |akka.stdout-loglevel = "ERROR" |akka.jvm-exit-on-fatal-error = off |akka.remote.require-cookie = "$requireCookie" |akka.remote.secure-cookie = "$secureCookie" |akka.remote.transport-failure-detector.heartbeat-interval = $akkaHeartBeatIntervalS s |akka.remote.transport-failure-detector.acceptable-heartbeat-pause = $akkaHeartBeatPausesS s |akka.actor.provider = "akka.remote.RemoteActorRefProvider" |akka.remote.netty.tcp.transport-class = "akka.remote.transport.netty.NettyTransport" |akka.remote.netty.tcp.hostname = "$host" |akka.remote.netty.tcp.port = $port |akka.remote.netty.tcp.tcp-nodelay = on |akka.remote.netty.tcp.connection-timeout = $akkaTimeoutS s |akka.remote.netty.tcp.maximum-frame-size = ${akkaFrameSize}B |akka.remote.netty.tcp.execution-pool-size = $akkaThreads |akka.actor.default-dispatcher.throughput = $akkaBatchSize |akka.log-config-on-start = $logAkkaConfig |akka.remote.log-remote-lifecycle-events = $lifecycleEvents |akka.log-dead-letters = $lifecycleEvents |akka.log-dead-letters-during-shutdown = $lifecycleEvents """.stripMargin)) //创建ActorSystem对象 val actorSystem = ActorSystem(name, akkaConf) val provider = actorSystem.asInstanceOf[ExtendedActorSystem].provider val boundPort = provider.getDefaultAddress.port.get //返回(ActorSystem,int)元组 (actorSystem, boundPort) }
以上,我们通过源码分析追踪了Spark通过ActorSystem的构造器创建ActorSystem对象的整个过程。