6. Spark源码解析之Worker实例化流程解析

前面解读了Worker的启动流程,5. Spark源码解析之worker启动流程解析

这里解读下Worker的实例化流程。

 

Worker实例化

通过RPC中new Worker,开始真正的实例化Worker。

private[deploy] class Worker(
    // 定义worker需要的参数,不用解读了
    override val rpcEnv: RpcEnv,
    webUiPort: Int,
    cores: Int,
    memory: Int,
    masterRpcAddresses: Array[RpcAddress],
    // endpointName=Worker
    endpointName: String,
    workDirPath: String = null,
    val conf: SparkConf,
    val securityMgr: SecurityManager)
  extends ThreadSafeRpcEndpoint with Logging {

  // 获取RPC的host和port
  private val host = rpcEnv.address.host
  private val port = rpcEnv.address.port

  // 检测host和port
  Utils.checkHost(host)
  assert (port > 0)

  // A scheduled executor used to send messages at the specified time.
  // 定时发送消息的调度器
  private val forwordMessageScheduler =
    ThreadUtils.newDaemonSingleThreadScheduledExecutor("worker-forward-message-scheduler")

  // A separated thread to clean up the workDir and the directories of finished applications.
  // Used to provide the implicit parameter of `Future` methods.
  // 清理workerDir和已完成任务的子线程
  private val cleanupThreadExecutor = ExecutionContext.fromExecutorService(
    ThreadUtils.newDaemonSingleThreadExecutor("worker-cleanup-thread"))

  // For worker and executor IDs
  // 获取时间作为worker及executor的ID
  private def createDateFormat = new SimpleDateFormat("yyyyMMddHHmmss", Locale.US)
  // Send a heartbeat every (heartbeat timeout) / 4 milliseconds
  // 每15秒发送一次心跳报告
  private val HEARTBEAT_MILLIS = conf.getLong("spark.worker.timeout", 60) * 1000 / 4

  // Model retries to connect to the master, after Hadoop's model.
  // The first six attempts to reconnect are in shorter intervals (between 5 and 15 seconds)
  // Afterwards, the next 10 attempts are between 30 and 90 seconds.
  // A bit of randomness is introduced so that not all of the workers attempt to reconnect at
  // the same time.
  // 连接master失败后的重连设置
  // 前5次重试间隔5-15秒之间,后面10次间隔在30-90秒之间
  // 引入了随机性,所以基本不存多个worker在同一时间重连
  // 后10次重连的开始数
  private val INITIAL_REGISTRATION_RETRIES = 6
  // 总重试15次
  private val TOTAL_REGISTRATION_RETRIES = INITIAL_REGISTRATION_RETRIES + 10
  // 重试最低间隔0.5秒
  private val FUZZ_MULTIPLIER_INTERVAL_LOWER_BOUND = 0.500
  private val REGISTRATION_RETRY_FUZZ_MULTIPLIER = {
    val randomNumberGenerator = new Random(UUID.randomUUID.getMostSignificantBits)
    // 每次重连间隔+0.5秒+随机数
    randomNumberGenerator.nextDouble + FUZZ_MULTIPLIER_INTERVAL_LOWER_BOUND
  }
  // 前5次的重连间隔时间 5-15秒
  private val INITIAL_REGISTRATION_RETRY_INTERVAL_SECONDS = (math.round(10 *
    REGISTRATION_RETRY_FUZZ_MULTIPLIER))
  // 后10次的重连间隔 60-90秒
  private val PROLONGED_REGISTRATION_RETRY_INTERVAL_SECONDS = (math.round(60
    * REGISTRATION_RETRY_FUZZ_MULTIPLIER))

  // 默认不清理,对应上面的cleanupThreadExecutor
  private val CLEANUP_ENABLED = conf.getBoolean("spark.worker.cleanup.enabled", false)
  // How often worker will clean up old app folders
  // 清理设置,清理文件时间,默认半小时
  private val CLEANUP_INTERVAL_MILLIS =
    conf.getLong("spark.worker.cleanup.interval", 60 * 30) * 1000
  // TTL for app folders/data;  after TTL expires it will be cleaned up
  // 程序和数据保存时间,到期才会进行清理,默认永久保存
  private val APP_DATA_RETENTION_SECONDS =
    conf.getLong("spark.worker.cleanup.appDataTtl", 7 * 24 * 3600)

  // 是否开启测试
  private val testing: Boolean = sys.props.contains("spark.testing")
  private var master: Option[RpcEndpointRef] = None

  /**
   * Whether to use the master address in `masterRpcAddresses` if possible. If it's disabled, Worker
   * will just use the address received from Master.
   */
  // 是否使用RPC中的master的地址,否则使用从mastr获取的地址,也就是host:port,默认关闭
  private val preferConfiguredMasterAddress =
    conf.getBoolean("spark.worker.preferConfiguredMasterAddress", false)
  /**
   * The master address to connect in case of failure. When the connection is broken, worker will
   * use this address to connect. This is usually just one of `masterRpcAddresses`. However, when
   * a master is restarted or takes over leadership, it will be an address sent from master, which
   * may not be in `masterRpcAddresses`.
   */
  // 断开后重连的master地址
  // 一般是重连RPC中注册的master地址,但master重启或选主后的新的地址可能不在RPC中
  private var masterAddressToConnect: Option[RpcAddress] = None
  private var activeMasterUrl: String = ""
  private[worker] var activeMasterWebUiUrl : String = ""
  private var workerWebUiUrl: String = ""
  // 通过RPC获取的workerUri信息
  private val workerUri = RpcEndpointAddress(rpcEnv.address, endpointName).toString
  private var registered = false
  private var connected = false
  // worker的ID,就是注册host和port的系统时间
  private val workerId = generateWorkerId()
  // spark目录
  private val sparkHome =
    if (testing) {
      assert(sys.props.contains("spark.test.home"), "spark.test.home is not set!")
      new File(sys.props("spark.test.home"))
    } else {
      new File(sys.env.get("SPARK_HOME").getOrElse("."))
    }

  var workDir: File = null
  // executor完成后的信息
  val finishedExecutors = new LinkedHashMap[String, ExecutorRunner]
  // driver信息
  val drivers = new HashMap[String, DriverRunner]
  // executors信息
  val executors = new HashMap[String, ExecutorRunner]
  // driver执行完的信息
  val finishedDrivers = new LinkedHashMap[String, DriverRunner]
  // app存放目录
  val appDirectories = new HashMap[String, Seq[String]]
  // app提交完成的信息
  val finishedApps = new HashSet[String]

  // webUI中保存executor数量,默认1000
  val retainedExecutors = conf.getInt("spark.worker.ui.retainedExecutors",
    WorkerWebUI.DEFAULT_RETAINED_EXECUTORS)
  // webUI中保存Driver程序数量,默认1000
  val retainedDrivers = conf.getInt("spark.worker.ui.retainedDrivers",
    WorkerWebUI.DEFAULT_RETAINED_DRIVERS)

  // The shuffle service is not actually started unless configured.
  // shuffle服务,需要配置才会启用,默认关闭
  private val shuffleService = new ExternalShuffleService(conf, securityMgr)

  private val publicAddress = {
    val envVar = conf.getenv("SPARK_PUBLIC_DNS")
    if (envVar != null) envVar else host
  }
  private var webUi: WorkerWebUI = null

  private var connectionAttemptCount = 0

  // 向metrics注册,metrics系统后续解析
  private val metricsSystem = MetricsSystem.createMetricsSystem("worker", conf, securityMgr)
  private val workerSource = new WorkerSource(this)

  // 是否启用反向代理,默认关闭
  val reverseProxy = conf.getBoolean("spark.ui.reverseProxy", false)

  // master注册的备用地址
  private var registerMasterFutures: Array[JFuture[_]] = null
  // 注册的时间记录
  private var registrationRetryTimer: Option[JScheduledFuture[_]] = None

  // A thread pool for registering with masters. Because registering with a master is a blocking
  // action, this thread pool must be able to create "masterRpcAddresses.size" threads at the same
  // time so that we can register with all masters.
  // 向多个master同时注册用的线程池
  private val registerMasterThreadPool = ThreadUtils.newDaemonCachedThreadPool(
    "worker-register-master-threadpool",
    masterRpcAddresses.length // Make sure we can register with all masters at the same time
  )

  // cpu内存使用信息
  var coresUsed = 0
  var memoryUsed = 0

  def coresFree: Int = cores - coresUsed
  def memoryFree: Int = memory - memoryUsed
}

 

上面的参数在Worker实例化时初始化好。

Worker继承了ThreadSafeRpcEndpoint ,而ThreadSafeRpcEndpoint继承的RpcEndpoint

private[spark] trait ThreadSafeRpcEndpoint extends RpcEndpoint

RpcEndpoint是通过RpcEnvFactory工厂模式创建的RpcEnv实例。 

private[spark] trait RpcEnvFactory {

  def create(config: RpcEnvConfig): RpcEnv
}

 这里Worker同样重写了RpcEnv的onStart、receive和onStop方法,所以初始化Worker时也会执行onStart方法。

onStart()

  override def onStart() {
    // 判断registered是否定义
    // 上面初始化了 private var registered = false
    assert(!registered)
    //启动worker的提示信息
    logInfo("Starting Spark worker %s:%d with %d cores, %s RAM".format(
      host, port, cores, Utils.megabytesToString(memory)))
    logInfo(s"Running Spark version ${org.apache.spark.SPARK_VERSION}")
    logInfo("Spark home: " + sparkHome)

    // 调用方法创建WorkerDir
    createWorkDir()
    // 启动shuffle服务
    startExternalShuffleService()
    // 初始化webUI
    webUi = new WorkerWebUI(this, workDir, webUiPort)
    // web设置到http服务器
    webUi.bind()

    // 绑定webUI端口
    workerWebUiUrl = s"http://$publicAddress:${webUi.boundPort}"
    // 向master注册worker
    registerWithMaster()

    // 注册metrics服务
    metricsSystem.registerSource(workerSource)
    // 启动metrics服务
    metricsSystem.start()
    // Attach the worker metrics servlet handler to the web ui after the metrics system is started.
    // 将metrics服务添加到webUI
    metricsSystem.getServletHandlers.foreach(webUi.attachHandler)
  }

createWorkDir() 

  private def createWorkDir() {
    // 如果没有设置workDir,这个目录默认在spark目录下创建
    workDir = Option(workDirPath).map(new File(_)).getOrElse(new File(sparkHome, "work"))
    try {
      // This sporadically fails - not sure why ... !workDir.exists() && !workDir.mkdirs()
      // So attempting to create and then check if directory was created or not.
      // 创建目录,如果存在报错退出
      workDir.mkdirs()
      if ( !workDir.exists() || !workDir.isDirectory) {
        logError("Failed to create work directory " + workDir)
        System.exit(1)
      }
      // 判断路径是否目录,不是则提示错误信息
      assert (workDir.isDirectory)
    } catch {
      // 捕获错误,提示退出
      case e: Exception =>
        logError("Failed to create work directory " + workDir, e)
        System.exit(1)
    }
  }

 startExternalShuffleService()

external shuffle服务主要是协调多个Worker共享Spark集群资源,后面再解读,这里是检查开启状态。

  private def startExternalShuffleService() {
    try {
      shuffleService.startIfEnabled()
    } catch {
      case e: Exception =>
        logError("Failed to start external shuffle service", e)
        System.exit(1)
    }
  }

继续看onStart往下,webUi = new WorkerWebUI(this, workDir, webUiPort),初始化了WorkerWebUI

WorkerWebUI,设置webUI布置等信息。

private[worker]
class WorkerWebUI(
    val worker: Worker,
    val workDir: File,
    requestedPort: Int)
  extends WebUI(worker.securityMgr, worker.securityMgr.getSSLOptions("standalone"),
    requestedPort, worker.conf, name = "WorkerUI")
  with Logging {

  // 连接超时,参数在下面object中初始化
  private[ui] val timeout = RpcUtils.askRpcTimeout(worker.conf)

  // 初始化webUI
  initialize()

  /** Initialize all components of the server. */
  // 执行初始化web服务的所有组件
  def initialize() {
    // LogPage类初始化页面布置
    val logPage = new LogPage(this)
    // 将web页面添加到UI设置
    attachPage(logPage)
    // 将worker的配置加载到UI设置
    attachPage(new WorkerPage(this))
    // 指定静态目录用来提供文件服务
    attachHandler(createStaticHandler(WorkerWebUI.STATIC_RESOURCE_BASE, "/static"))
    // 指定固定访问路径
    attachHandler(createServletHandler("/log",
      (request: HttpServletRequest) => logPage.renderLog(request),
      worker.securityMgr,
      worker.conf))
  }
}

private[worker] object WorkerWebUI {
  // 静态目录位置
  val STATIC_RESOURCE_BASE = SparkUI.STATIC_RESOURCE_DIR
  // webUI中保存的driver数量
  val DEFAULT_RETAINED_DRIVERS = 1000
  // webUI中保存的executor数量
  val DEFAULT_RETAINED_EXECUTORS = 1000
}

然后 webUi.bind() 将webUI设置绑定到http服务器。

  /** Bind to the HTTP server behind this web interface. */
  def bind(): Unit = {
    // 如果设置为空,提示多次绑定
    assert(serverInfo.isEmpty, s"Attempted to bind $className more than once!")
    try {
      // 获取host,并从jetty获取信息
      val host = Option(conf.getenv("SPARK_LOCAL_IP")).getOrElse("0.0.0.0")
      serverInfo = Some(startJettyServer(host, port, sslOptions, handlers, conf, name))
      logInfo(s"Bound $className to $host, and started at $webUrl")
    } catch {
      case e: Exception =>
        // 无法获取配置,提示失败退出
        logError(s"Failed to bind $className", e)
        System.exit(1)
    }
  }

 再往下,registerWithMaster()开始注册这个方法也会被re-registerWithMaster() 多次重试调用。

  // 注册到master
  private def registerWithMaster() {
    // onDisconnected may be triggered multiple times, so don't attempt registration
    // if there are outstanding registration attempts scheduled.
    // 判断如果有服务正在连接,不要再次连接
    registrationRetryTimer match {
      // 没有正在连接的服务,正常尝试注册到所有master
      case None =>
        registered = false
        // 通过registerMasterThreadPool线程池注册到所有master
        registerMasterFutures = tryRegisterAllMasters()
        // 没有连接,所以连接重试为0
        connectionAttemptCount = 0
        // 注册超时重试
        registrationRetryTimer = Some(forwordMessageScheduler.scheduleAtFixedRate(
          new Runnable {
            override def run(): Unit = Utils.tryLogNonFatalError {
              // 遍历所有master,发送注册信息再次重连
              Option(self).foreach(_.send(ReregisterWithMaster))
            }
          },
          // 初始化时有设置,前5次重试时间间隔为5-15秒
          INITIAL_REGISTRATION_RETRY_INTERVAL_SECONDS,
          // 后10次间隔为60-90秒
          INITIAL_REGISTRATION_RETRY_INTERVAL_SECONDS,
          TimeUnit.SECONDS))
      // 如果有已经在连接的服务,提示等待连接完成
      case Some(_) =>
        logInfo("Not spawning another attempt to register with the master, since there is an" +
          " attempt scheduled already.")
    }
  }

tryRegisterAllMasters(),先尝试注册

  private def tryRegisterAllMasters(): Array[JFuture[_]] = {
    // 获取RPC中注册的所有master地址进行注册
    masterRpcAddresses.map { masterAddress =>
      // 在线程池中开启线程对遍历的每个master进行注册
      registerMasterThreadPool.submit(new Runnable {
        override def run(): Unit = {
          try {
            logInfo("Connecting to master " + masterAddress + "...")
            // 获取RPC中注册的所有master
            val masterEndpoint = rpcEnv.setupEndpointRef(masterAddress, Master.ENDPOINT_NAME)
            // 发送注册信息到每个master
            sendRegisterMessageToMaster(masterEndpoint)
          } catch {
            case ie: InterruptedException => // Cancelled
            case NonFatal(e) => logWarning(s"Failed to connect to master $masterAddress", e)
          }
        }
      })
    }
  }

 sendRegisterMessageToMaster(masterEndpoint)

调用了RpcEndpointRef类的send()将Worker的配置信息发送给Master进行注册。这个才是真正的发送注册信息。

  private def sendRegisterMessageToMaster(masterEndpoint: RpcEndpointRef): Unit = {
    masterEndpoint.send(RegisterWorker(
      workerId,
      host,
      port,
      self,
      cores,
      memory,
      workerWebUiUrl,
      masterEndpoint.address))
  }

参数self,其实就是RpcEndpointRef 实例。

  final def self: RpcEndpointRef = {
    require(rpcEnv != null, "rpcEnv has not been initialized")
    rpcEnv.endpointRef(this)
  }

send()

private[spark] abstract class RpcEndpointRef(conf: SparkConf){
    ...

 /**
   * Sends a one-way asynchronous message. Fire-and-forget semantics.
   */
  def send(message: Any): Unit
}

这里可以返回Master的 receive() 看看怎么接收的。

  override def receive: PartialFunction[Any, Unit] = {
    // 接收选主的信息等等...
    case ElectedLeader =>
        ...
      }

    case CompleteRecovery => completeRecovery()

    // 接收撤销主信息
    case RevokedLeadership =>
      logError("Leadership has been revoked -- master shutting down.")
      System.exit(0)

    // 其实跟Worer发送的信息相对应
    case RegisterWorker(
      id, workerHost, workerPort, workerRef, cores, memory, workerWebUiUrl, masterAddress) =>
      // 注册的log信息
      logInfo("Registering worker %s:%d with %d cores, %s RAM".format(
        workerHost, workerPort, cores, Utils.megabytesToString(memory)))

      // STANDBY, ALIVE, RECOVERING, COMPLETING_RECOVERY
      // 如果master是standby状态,调用传入的workerRef(RpcEndpointRef)的send()传回master不存在的信息
      if (state == RecoveryState.STANDBY) {
        workerRef.send(MasterInStandby)
        // 不是standby,判断workerId是否存在,idToWorker是master中初始化的workerID,存在提示worker存在
      } else if (idToWorker.contains(id)) {
        workerRef.send(RegisterWorkerFailed("Duplicate worker ID"))
        // 正常注册worker
      } else {
        // 先通过传入的参数创建worker实例
        val worker = new WorkerInfo(id, workerHost, workerPort, cores, memory,
          workerRef, workerWebUiUrl)
        // 持久化判断,如果worker实例正常创建,持久化worker,添加worker
        if (registerWorker(worker)) {
          persistenceEngine.addWorker(worker)
          workerRef.send(RegisteredWorker(self, masterWebUiUrl, masterAddress))
          schedule()
        } else {
          // worker实例没有正常创建,获取worker地址,提示并发送重试错误信息
          val workerAddress = worker.endpoint.address
          logWarning("Worker registration failed. Attempted to re-register worker at same " +
            "address: " + workerAddress)
          workerRef.send(RegisterWorkerFailed("Attempted to re-register worker at same address: "
            + workerAddress))
        }
      }
    }

 如果注册成功,通过workerRef.send()发送Worker序列化实例。

private[deploy] sealed trait DeployMessage extends Serializable

/** Contains messages sent between Scheduler endpoint nodes. */
// 调度的终端发送到各个节点的worker信息
private[deploy] object DeployMessages {
  case class RegisteredWorker(
      master: RpcEndpointRef,
      masterWebUiUrl: String,
      masterAddress: RpcAddress) extends DeployMessage with RegisterWorkerResponse

  ...
}

 schedule(),资源的调度,具体调度组件后续解析。

  /**
   * Schedule the currently available resources among waiting apps. This method will be called
   * every time a new app joins or resource availability changes.
   */
  // 资源调度,如果等待的app或者新app加入时,有可用资源就会调度这个方法
  // 如果master不存活,退出
  private def schedule(): Unit = {
    if (state != RecoveryState.ALIVE) {
      return
    }
    // Drivers take strict precedence over executors
    // driver的执行优先于executors
    // 这里是判断driver的执行情况
    val shuffledAliveWorkers = Random.shuffle(workers.toSeq.filter(_.state == WorkerState.ALIVE))
    val numWorkersAlive = shuffledAliveWorkers.size
    var curPos = 0
    for (driver <- waitingDrivers.toList) { // iterate over a copy of waitingDrivers
      // We assign workers to each waiting driver in a round-robin fashion. For each driver, we
      // start from the last worker that was assigned a driver, and continue onwards until we have
      // explored all alive workers.
      var launched = false
      var numWorkersVisited = 0
      while (numWorkersVisited < numWorkersAlive && !launched) {
        val worker = shuffledAliveWorkers(curPos)
        numWorkersVisited += 1
        if (worker.memoryFree >= driver.desc.mem && worker.coresFree >= driver.desc.cores) {
          launchDriver(worker, driver)
          waitingDrivers -= driver
          launched = true
        }
        curPos = (curPos + 1) % numWorkersAlive
      }
    }
    startExecutorsOnWorkers()
  }

注册阶段完成,返回onStart()中,metricsSystem类后续解读,onStart基本就完成了。

继续返回org.apache.spark.deploy.worker.Worker的startRpcEnvAndEndpoint()中,Worker的实例化也就完成了。

Worker里面还有一些其他的方法,以后调用的时候再解读。

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

訾零

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值