flink akka 应用

最新推荐文章于 2025-02-06 00:11:56 发布

原创最新推荐文章于 2025-02-06 00:11:56 发布 · 2.1k 阅读

0 ·

CC 4.0 BY-SA版权

flink 专栏收录该内容

26 篇文章

订阅专栏

本文深入探讨了 Apache Flink 中 JobManager 的核心组件及其实现细节，包括配置解析、领导权选举机制、任务初始化过程等关键环节。

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.flink.runtime.jobmanager

import java.io.{File, IOException}
import java.lang.reflect.{Constructor, InvocationTargetException}
import java.net.{UnknownHostException, InetAddress, InetSocketAddress}
import java.util.UUID

import akka.actor.Status.Failure
import akka.actor._
import akka.pattern.ask
import grizzled.slf4j.Logger
import org.apache.flink.api.common.{ExecutionConfig, JobID}
import org.apache.flink.configuration.{ConfigConstants, Configuration, GlobalConfiguration}
import org.apache.flink.core.io.InputSplitAssigner
import org.apache.flink.runtime.accumulators.AccumulatorSnapshot
import org.apache.flink.runtime.akka.{AkkaUtils, ListeningBehaviour}
import org.apache.flink.runtime.blob.BlobServer
import org.apache.flink.runtime.checkpoint.{CheckpointRecoveryFactory, StandaloneCheckpointRecoveryFactory, ZooKeeperCheckpointRecoveryFactory}
import org.apache.flink.runtime.client._
import org.apache.flink.runtime.execution.librarycache.BlobLibraryCacheManager
import org.apache.flink.runtime.executiongraph.{ExecutionGraph, ExecutionJobVertex}
import org.apache.flink.runtime.instance.{AkkaActorGateway, InstanceManager}
import org.apache.flink.runtime.jobgraph.jsonplan.JsonPlanGenerator
import org.apache.flink.runtime.jobgraph.{JobGraph, JobStatus, JobVertexID}
import org.apache.flink.runtime.jobmanager.SubmittedJobGraphStore.SubmittedJobGraphListener
import org.apache.flink.runtime.jobmanager.scheduler.{Scheduler => FlinkScheduler}
import org.apache.flink.runtime.leaderelection.{LeaderContender, LeaderElectionService, StandaloneLeaderElectionService}
import org.apache.flink.runtime.leaderretrieval.{StandaloneLeaderRetrievalService, LeaderRetrievalService}
import org.apache.flink.runtime.messages.ArchiveMessages.ArchiveExecutionGraph
import org.apache.flink.runtime.messages.ExecutionGraphMessages.JobStatusChanged
import org.apache.flink.runtime.messages.JobManagerMessages._
import org.apache.flink.runtime.messages.Messages.{Acknowledge, Disconnect}
import org.apache.flink.runtime.messages.RegistrationMessages._
import org.apache.flink.runtime.messages.TaskManagerMessages.{Heartbeat, SendStackTrace}
import org.apache.flink.runtime.messages.TaskMessages.{PartitionState, UpdateTaskExecutionState}
import org.apache.flink.runtime.messages.accumulators.{AccumulatorMessage, AccumulatorResultStringsFound, AccumulatorResultsErroneous, AccumulatorResultsFound, RequestAccumulatorResults, RequestAccumulatorResultsStringified}
import org.apache.flink.runtime.messages.checkpoint.{AbstractCheckpointMessage, AcknowledgeCheckpoint}
import org.apache.flink.runtime.messages.webmonitor._
import org.apache.flink.runtime.process.ProcessReaper
import org.apache.flink.runtime.security.SecurityUtils
import org.apache.flink.runtime.security.SecurityUtils.FlinkSecuredRunner
import org.apache.flink.runtime.taskmanager.TaskManager
import org.apache.flink.runtime.util._
import org.apache.flink.runtime.webmonitor.{WebMonitor, WebMonitorUtils}
import org.apache.flink.runtime.{FlinkActor, LeaderSessionMessageFilter, LogMessages, StreamingMode}
import org.apache.flink.util.{ExceptionUtils, InstantiationUtil, NetUtils}

import scala.collection.JavaConverters._
import scala.concurrent._
import scala.concurrent.duration._
import scala.concurrent.forkjoin.ForkJoinPool
import scala.language.postfixOps

/**
* The job manager is responsible for receiving Flink jobs, scheduling the tasks, gathering the
* job status and managing the task managers. It is realized as an actor and receives amongst others
* the following messages:
*
* - [[RegisterTaskManager]] is sent by a TaskManager which wants to register at the job manager.
* A successful registration at the instance manager is acknowledged by [[AcknowledgeRegistration]]
*
* - [[SubmitJob]] is sent by a client which wants to submit a job to the system. The submit
* message contains the job description in the form of the JobGraph. The JobGraph is appended to
* the ExecutionGraph and the corresponding ExecutionJobVertices are scheduled for execution on
* the TaskManagers.
*
* - [[CancelJob]] requests to cancel the job with the specified jobID. A successful cancellation
* is indicated by [[CancellationSuccess]] and a failure by [[CancellationFailure]]
*
* - [[UpdateTaskExecutionState]] is sent by a TaskManager to update the state of an
ExecutionVertex contained in the [[ExecutionGraph]].
* A successful update is acknowledged by true and otherwise false.
*
* - [[RequestNextInputSplit]] requests the next input split for a running task on a
* [[TaskManager]]. The assigned input split or null is sent to the sender in the form of the
* message [[NextInputSplit]].
*
* - [[JobStatusChanged]] indicates that the status of job (RUNNING, CANCELING, FINISHED, etc.) has
* changed. This message is sent by the ExecutionGraph.
*/
class JobManager(
protected val flinkConfiguration: Configuration,
protected val executionContext: ExecutionContext,
protected val instanceManager: InstanceManager,
protected val scheduler: FlinkScheduler,
protected val libraryCacheManager: BlobLibraryCacheManager,
protected val archive: ActorRef,
protected val defaultExecutionRetries: Int,
protected val delayBetweenRetries: Long,
protected val timeout: FiniteDuration,
protected val mode: StreamingMode,
protected val leaderElectionService: LeaderElectionService,
protected val submittedJobGraphs : SubmittedJobGraphStore,
protected val checkpointRecoveryFactory : CheckpointRecoveryFactory)
extends FlinkActor
with LeaderSessionMessageFilter // mixin oder is important, we want filtering after logging
with LogMessages // mixin order is important, we want first logging
with LeaderContender
with SubmittedJobGraphListener {

override val log = Logger(getClass)

/** Either running or not yet archived jobs (session hasn't been ended). */
protected val currentJobs = scala.collection.mutable.HashMap[JobID, (ExecutionGraph, JobInfo)]()

protected val recoveryMode = RecoveryMode.fromConfig(flinkConfiguration)

var leaderSessionID: Option[UUID] = None

/** Futures which have to be completed before terminating the job manager */
var futuresToComplete: Option[Seq[Future[Unit]]] = None

/**
* The port of the web monitor as configured. Make sure that it is actually configured before
* starting the JobManager. This tightly couples the web monitor with the job manager. It is a
* temporary workaround until all execution graph components are properly serializable and all
* web monitors can transparently interact with each job manager. Currently each web server has
* to run in the actor system of the associated job manager.
*/
val webMonitorPort : Int = flinkConfiguration.getInteger(
ConfigConstants.JOB_MANAGER_WEB_PORT_KEY, -1)

/**
* Run when the job manager is started. Simply logs an informational message.
* The method also starts the leader election service.
*/
override def preStart(): Unit = {
log.info(s"Starting JobManager at ${getAddress}.")

try {
leaderElectionService.start(this)
} catch {
case e: Exception =>
log.error("Could not start the JobManager because the leader election service did not " +
"start.", e)
throw new RuntimeException("Could not start the leader election service.", e)
}

try {
submittedJobGraphs.start(this)
} catch {
case e: Exception =>
log.error("Could not start the submitted job graphs service.", e)
throw new RuntimeException("Could not start the submitted job graphs service.", e)
}

try {
checkpointRecoveryFactory.start()
} catch {
case e: Exception =>
log.error("Could not start the checkpoint recovery service.", e)
throw new RuntimeException("Could not start the checkpoint recovery service.", e)
}
}

override def postStop(): Unit = {
log.info(s"Stopping JobManager ${getAddress}.")

val newFuturesToComplete = cancelAndClearEverything(
new Exception("The JobManager is shutting down."),
true)

implicit val executionContext = context.dispatcher

val futureToComplete = Future.sequence(
futuresToComplete.getOrElse(Seq()) ++ newFuturesToComplete)

Await.ready(futureToComplete, timeout)

// disconnect the registered task managers
instanceManager.getAllRegisteredInstances.asScala.foreach {
_.getActorGateway().tell(
Disconnect("JobManager is shutting down"),
new AkkaActorGateway(self, leaderSessionID.orNull))
}

try {
// revoke leadership and stop leader election service
leaderElectionService.stop()
} catch {
case e: Exception => log.error("Could not properly shutdown the leader election service.")
}

try {
submittedJobGraphs.stop()
} catch {
case e: Exception => log.error("Could not properly stop the submitted job graphs service.")
}

try {
checkpointRecoveryFactory.stop()
} catch {
case e: Exception => log.error("Could not properly stop the checkpoint recovery service.")
}

if (archive != ActorRef.noSender) {
archive ! decorateMessage(PoisonPill)
}

instanceManager.shutdown()
scheduler.shutdown()

try {
libraryCacheManager.shutdown()
} catch {
case e: IOException => log.error("Could not properly shutdown the library cache manager.", e)
}

log.debug(s"Job manager ${self.path} is completely stopped.")
}

/**
* Central work method of the JobManager actor. Receives messages and reacts to them.
*
* @return
*/
override def handleMessage: Receive = {

case GrantLeadership(newLeaderSessionID) =>
log.info(s"JobManager ${getAddress} was granted leadership with leader session ID " +
s"${newLeaderSessionID}.")

leaderSessionID = newLeaderSessionID

// confirming the leader session ID might be blocking, thus do it in a future
future{
leaderElectionService.confirmLeaderSessionID(newLeaderSessionID.orNull)

// TODO (critical next step) This needs to be more flexible and robust (e.g. wait for task
// managers etc.)
if (recoveryMode != RecoveryMode.STANDALONE) {
context.system.scheduler.scheduleOnce(new FiniteDuration(delayBetweenRetries,
MILLISECONDS), self, decorateMessage(RecoverAllJobs))(context.dispatcher)
}
}(context.dispatcher)

case RevokeLeadership =>
log.info(s"JobManager ${self.path.toSerializationFormat} was revoked leadership.")

val newFuturesToComplete = cancelAndClearEverything(
new Exception("JobManager is no longer the leader."),
false)

futuresToComplete = Some(futuresToComplete.getOrElse(Seq()) ++ newFuturesToComplete)

// disconnect the registered task managers
instanceManager.getAllRegisteredInstances.asScala.foreach {
_.getActorGateway().tell(
Disconnect("JobManager is no longer the leader"),
new AkkaActorGateway(self, leaderSessionID.orNull))
}

instanceManager.unregisterAllTaskManagers()

leaderSessionID = None

case RegisterTaskManager(
connectionInfo,
hardwareInformation,
numberOfSlots) =>

val taskManager = sender()

if (instanceManager.isRegistered(taskManager)) {
val instanceID = instanceManager.getRegisteredInstance(taskManager).getId

// IMPORTANT: Send the response to the "sender", which is not the
// TaskManager actor, but the ask future!
sender() ! decorateMessage(
AlreadyRegistered(
instanceID,
libraryCacheManager.getBlobServerPort)
)
}
else {
try {
val instanceID = instanceManager.registerTaskManager(
taskManager,
connectionInfo,
hardwareInformation,
numberOfSlots,
leaderSessionID.orNull)

// IMPORTANT: Send the response to the "sender", which is not the
// TaskManager actor, but the ask future!
sender() ! decorateMessage(
AcknowledgeRegistration(
instanceID,
libraryCacheManager.getBlobServerPort)
)

// to be notified when the taskManager is no longer reachable
context.watch(taskManager)
}
catch {
// registerTaskManager throws an IllegalStateException if it is already shut down
// let the actor crash and restart itself in this case
case e: Exception =>
log.error("Failed to register TaskManager at instance manager", e)

// IMPORTANT: Send the response to the "sender", which is not the
// TaskManager actor, but the ask future!
sender() ! decorateMessage(
RefuseRegistration(
ExceptionUtils.stringifyException(e))
)
}
}

case RequestNumberRegisteredTaskManager =>
sender ! decorateMessage(instanceManager.getNumberOfRegisteredTaskManagers)

case RequestTotalNumberOfSlots =>
sender ! decorateMessage(instanceManager.getTotalNumberOfSlots)

case SubmitJob(jobGraph, listeningBehaviour) =>
val client = sender()

val jobInfo = new JobInfo(client, listeningBehaviour, System.currentTimeMillis(),
jobGraph.getSessionTimeout)

submitJob(jobGraph, jobInfo)

case RecoverSubmittedJob(submittedJobGraph) =>
if (!currentJobs.contains(submittedJobGraph.getJobId)) {
submitJob(
submittedJobGraph.getJobGraph(),
submittedJobGraph.getJobInfo(),
isRecovery = true)
}

case RecoverJob(jobId) =>
future {
// The ActorRef, which is part of the submitted job graph can only be deserialized in the
// scope of an actor system.
akka.serialization.JavaSerializer.currentSystem.withValue(
context.system.asInstanceOf[ExtendedActorSystem]) {

log.info(s"Attempting to recover job $jobId.")

val submittedJobGraphOption = submittedJobGraphs.recoverJobGraph(jobId)

submittedJobGraphOption match {
case Some(submittedJobGraph) =>
if (!leaderElectionService.hasLeadership()) {
// we've lost leadership. mission: abort.
log.warn(s"Lost leadership during recovery. Aborting recovery of $jobId.")
}
else {
self ! decorateMessage(RecoverSubmittedJob(submittedJobGraph))
}
case None => log.warn(s"Failed to recover job graph $jobId.")
}
}
}(context.dispatcher)

case RecoverAllJobs =>
future {
// The ActorRef, which is part of the submitted job graph can only be deserialized in the
// scope of an actor system.
akka.serialization.JavaSerializer.currentSystem.withValue(
context.system.asInstanceOf[ExtendedActorSystem]) {

log.info(s"Recovering all jobs.")

val jobGraphs = submittedJobGraphs.recoverJobGraphs().asScala

if (!leaderElectionService.hasLeadership()) {
// we've lost leadership. mission: abort.
log.warn(s"Lost leadership during recovery. Aborting recovery of ${jobGraphs.size} " +
s"jobs.")
}
else {
log.debug(s"Attempting to recover ${jobGraphs.size} job graphs.")

jobGraphs.foreach{
submittedJobGraph =>
self ! decorateMessage(RecoverSubmittedJob(submittedJobGraph))
}
}
}
}(context.dispatcher)

case CancelJob(jobID) =>
log.info(s"Trying to cancel job with ID $jobID.")

currentJobs.get(jobID) match {
case Some((executionGraph, _)) =>
// execute the cancellation asynchronously
Future {
executionGraph.cancel()
}(context.dispatcher)

sender ! decorateMessage(CancellationSuccess(jobID))
case None =>
log.info(s"No job found with ID $jobID.")
sender ! decorateMessage(
CancellationFailure(
jobID,
new IllegalArgumentException(s"No job found with ID $jobID."))
)
}

case UpdateTaskExecutionState(taskExecutionState) =>
if (taskExecutionState == null) {
sender ! decorateMessage(false)
} else {
currentJobs.get(taskExecutionState.getJobID) match {
case Some((executionGraph, _)) =>
val originalSender = sender()

Future {
val result = executionGraph.updateState(taskExecutionState)
originalSender ! decorateMessage(result)
}(context.dispatcher)

case None => log.error("Cannot find execution graph for ID " +
s"${taskExecutionState.getJobID} to change state to " +
s"${taskExecutionState.getExecutionState}.")
sender ! decorateMessage(false)
}
}

case RequestNextInputSplit(jobID, vertexID, executionAttempt) =>
val serializedInputSplit = currentJobs.get(jobID) match {
case Some((executionGraph,_)) =>
val execution = executionGraph.getRegisteredExecutions.get(executionAttempt)

if (execution == null) {
log.error(s"Can not find Execution for attempt $executionAttempt.")
null
} else {
val slot = execution.getAssignedResource
val taskId = execution.getVertex.getParallelSubtaskIndex

val host = if (slot != null) {
slot.getInstance().getInstanceConnectionInfo.getHostname
} else {
null
}

executionGraph.getJobVertex(vertexID) match {
case vertex: ExecutionJobVertex => vertex.getSplitAssigner match {
case splitAssigner: InputSplitAssigner =>
val nextInputSplit = splitAssigner.getNextInputSplit(host, taskId)

log.debug(s"Send next input split $nextInputSplit.")

try {
InstantiationUtil.serializeObject(nextInputSplit)
} catch {
case ex: Exception =>
log.error(s"Could not serialize the next input split of " +
s"class ${nextInputSplit.getClass}.", ex)
vertex.fail(new RuntimeException("Could not serialize the next input split " +
"of class " + nextInputSplit.getClass + ".", ex))
null
}

case _ =>
log.error(s"No InputSplitAssigner for vertex ID $vertexID.")
null
}
case _ =>
log.error(s"Cannot find execution vertex for vertex ID $vertexID.")
null
}
}
case None =>
log.error(s"Cannot find execution graph for job ID $jobID.")
null
}

sender ! decorateMessage(NextInputSplit(serializedInputSplit))

case checkpointMessage : AbstractCheckpointMessage =>
handleCheckpointMessage(checkpointMessage)

case JobStatusChanged(jobID, newJobStatus, timeStamp, error) =>
currentJobs.get(jobID) match {
case Some((executionGraph, jobInfo)) => executionGraph.getJobName

log.info(
s"Status of job $jobID (${executionGraph.getJobName}) changed to $newJobStatus.",
error)

if (newJobStatus.isTerminalState()) {
jobInfo.end = timeStamp

future{
// TODO If removing the JobGraph from the SubmittedJobGraphsStore fails, the job will
// linger around and potentially be recovered at a later time. There is nothing we
// can do about that, but it should be communicated with the Client.
if (jobInfo.sessionAlive) {
jobInfo.setLastActive()
val lastActivity = jobInfo.lastActive
context.system.scheduler.scheduleOnce(jobInfo.sessionTimeout seconds) {
// remove only if no activity occurred in the meantime
if (lastActivity == jobInfo.lastActive) {
self ! decorateMessage(RemoveJob(jobID, true))
}
}(context.dispatcher)
} else {
self ! decorateMessage(RemoveJob(jobID, true))
}

// is the client waiting for the job result?
if (jobInfo.listeningBehaviour != ListeningBehaviour.DETACHED) {
newJobStatus match {
case JobStatus.FINISHED =>
try {
val accumulatorResults = executionGraph.getAccumulatorsSerialized()
val result = new SerializedJobExecutionResult(
jobID,
jobInfo.duration,
accumulatorResults)

jobInfo.client ! decorateMessage(JobResultSuccess(result))
} catch {
case e: Exception =>
log.error(s"Cannot fetch final accumulators for job $jobID", e)
val exception = new JobExecutionException(jobID,
"Failed to retrieve accumulator results.", e)

jobInfo.client ! decorateMessage(JobResultFailure(
new SerializedThrowable(exception)))
}

case JobStatus.CANCELED =>
// the error may be packed as a serialized throwable
val unpackedError = SerializedThrowable.get(
error, executionGraph.getUserClassLoader())

jobInfo.client ! decorateMessage(JobResultFailure(
new SerializedThrowable(
new JobCancellationException(jobID, "Job was cancelled.", unpackedError))))

case JobStatus.FAILED =>
val unpackedError = SerializedThrowable.get(
error, executionGraph.getUserClassLoader())

jobInfo.client ! decorateMessage(JobResultFailure(
new SerializedThrowable(
new JobExecutionException(jobID, "Job execution failed.", unpackedError))))

case x =>
val exception = new JobExecutionException(jobID, s"$x is not a terminal state.")
jobInfo.client ! decorateMessage(JobResultFailure(
new SerializedThrowable(exception)))
throw exception
}
}
}(context.dispatcher)
}
case None =>
self ! decorateMessage(RemoveJob(jobID, true))
}

case ScheduleOrUpdateConsumers(jobId, partitionId) =>
currentJobs.get(jobId) match {
case Some((executionGraph, _)) =>
sender ! decorateMessage(Acknowledge)
executionGraph.scheduleOrUpdateConsumers(partitionId)
case None =>
log.error(s"Cannot find execution graph for job ID $jobId to schedule or update " +
s"consumers.")
sender ! decorateMessage(
Failure(
new IllegalStateException("Cannot find execution graph for job ID " +
s"$jobId to schedule or update consumers.")
)
)
}

case RequestPartitionState(jobId, partitionId, taskExecutionId, taskResultId) =>
val state = currentJobs.get(jobId) match {
case Some((executionGraph, _)) =>
val execution = executionGraph.getRegisteredExecutions.get(partitionId.getProducerId)

if (execution != null) execution.getState else null
case None =>
// Nothing to do. This is not an error, because the request is received when a sending
// task fails during a remote partition request.
log.debug(s"Cannot find execution graph for job $jobId.")

null
}

sender ! decorateMessage(
PartitionState(
taskExecutionId,
taskResultId,
partitionId.getPartitionId,
state)
)

case RequestJobStatus(jobID) =>
currentJobs.get(jobID) match {
case Some((executionGraph,_)) =>
sender ! decorateMessage(CurrentJobStatus(jobID, executionGraph.getState))
case None =>
// check the archive
archive forward decorateMessage(RequestJobStatus(jobID))
}

case RequestRunningJobs =>
val executionGraphs = currentJobs map {
case (_, (eg, jobInfo)) => eg
}

sender ! decorateMessage(RunningJobs(executionGraphs))

case RequestRunningJobsStatus =>
try {
val jobs = currentJobs map {
case (_, (eg, _)) =>
new JobStatusMessage(
eg.getJobID,
eg.getJobName,
eg.getState,
eg.getStatusTimestamp(JobStatus.CREATED)
)
}

sender ! decorateMessage(RunningJobsStatus(jobs))
}
catch {
case t: Throwable => log.error("Exception while responding to RequestRunningJobsStatus", t)
}

case RequestJob(jobID) =>
currentJobs.get(jobID) match {
case Some((eg, _)) => sender ! decorateMessage(JobFound(jobID, eg))
case None =>
// check the archive
archive forward decorateMessage(RequestJob(jobID))
}

case RequestBlobManagerPort =>
sender ! decorateMessage(libraryCacheManager.getBlobServerPort)

case RequestArchive =>
sender ! decorateMessage(ResponseArchive(archive))

case RequestRegisteredTaskManagers =>
import scala.collection.JavaConverters._
sender ! decorateMessage(
RegisteredTaskManagers(
instanceManager.getAllRegisteredInstances.asScala
)
)

case RequestTaskManagerInstance(instanceID) =>
sender ! decorateMessage(
TaskManagerInstance(Option(instanceManager.getRegisteredInstanceById(instanceID)))
)

case Heartbeat(instanceID, metricsReport, accumulators) =>
log.debug(s"Received hearbeat message from $instanceID.")

updateAccumulators(accumulators)

instanceManager.reportHeartBeat(instanceID, metricsReport)

case message: AccumulatorMessage => handleAccumulatorMessage(message)

case message: InfoMessage => handleInfoRequestMessage(message, sender())

case RequestStackTrace(instanceID) =>
val gateway = instanceManager.getRegisteredInstanceById(instanceID).getActorGateway
gateway.forward(SendStackTrace, new AkkaActorGateway(sender, leaderSessionID.orNull))

case Terminated(taskManager) =>
if (instanceManager.isRegistered(taskManager)) {
log.info(s"Task manager ${taskManager.path} terminated.")

instanceManager.unregisterTaskManager(taskManager, true)
context.unwatch(taskManager)
}

case RequestJobManagerStatus =>
sender() ! decorateMessage(JobManagerStatusAlive)

case RemoveJob(jobID, clearPersistedJob) =>
currentJobs.get(jobID) match {
case Some((graph, info)) =>
removeJob(graph.getJobID, clearPersistedJob) match {
case Some(futureToComplete) =>
futuresToComplete = Some(futuresToComplete.getOrElse(Seq()) :+ futureToComplete)
case None =>
}
case None =>
}

case RemoveCachedJob(jobID) =>
currentJobs.get(jobID) match {
case Some((graph, info)) =>
if (graph.getState.isTerminalState) {
removeJob(graph.getJobID, true) match {
case Some(futureToComplete) =>
futuresToComplete = Some(futuresToComplete.getOrElse(Seq()) :+ futureToComplete)
case None =>
}
} else {
// triggers removal upon completion of job
info.sessionAlive = false
}
case None =>
}

case Disconnect(msg) =>
val taskManager = sender()

if (instanceManager.isRegistered(taskManager)) {
log.info(s"Task manager ${taskManager.path} wants to disconnect, because $msg.")

instanceManager.unregisterTaskManager(taskManager, false)
context.unwatch(taskManager)
}

case RequestLeaderSessionID =>
sender() ! ResponseLeaderSessionID(leaderSessionID.orNull)

case RequestWebMonitorPort =>
sender() ! ResponseWebMonitorPort(webMonitorPort)
}

/**
* Submits a job to the job manager. The job is registered at the libraryCacheManager which
* creates the job's class loader. The job graph is appended to the corresponding execution
* graph and the execution vertices are queued for scheduling.
*
* @param jobGraph representing the Flink job
* @param jobInfo the job info
* @param isRecovery Flag indicating whether this is a recovery or initial submission
*/
private def submitJob(jobGraph: JobGraph, jobInfo: JobInfo, isRecovery: Boolean = false): Unit = {
if (jobGraph == null) {
jobInfo.client ! decorateMessage(JobResultFailure(
new SerializedThrowable(
new JobSubmissionException(null, "JobGraph must not be null.")
)
))
}
else {
val jobId = jobGraph.getJobID
val jobName = jobGraph.getName
var executionGraph: ExecutionGraph = null

log.info(s"Submitting job $jobId ($jobName)" + (if (isRecovery) " (Recovery)" else "") + ".")

try {
// Important: We need to make sure that the library registration is the first action,
// because this makes sure that the uploaded jar files are removed in case of
// unsuccessful
try {
libraryCacheManager.registerJob(jobGraph.getJobID, jobGraph.getUserJarBlobKeys,
jobGraph.getClasspaths)
}
catch {
case t: Throwable =>
throw new JobSubmissionException(jobId,
"Cannot set up the user code libraries: " + t.getMessage, t)
}

val userCodeLoader = libraryCacheManager.getClassLoader(jobGraph.getJobID)
if (userCodeLoader == null) {
throw new JobSubmissionException(jobId,
"The user code class loader could not be initialized.")
}

if (jobGraph.getNumberOfVertices == 0) {
throw new JobSubmissionException(jobId, "The given job is empty")
}

// see if there already exists an ExecutionGraph for the corresponding job ID
executionGraph = currentJobs.get(jobGraph.getJobID) match {
case Some((graph, currentJobInfo)) =>
currentJobInfo.setLastActive()
graph
case None =>
val graph = new ExecutionGraph(
executionContext,
jobGraph.getJobID,
jobGraph.getName,
jobGraph.getJobConfiguration,
timeout,
jobGraph.getUserJarBlobKeys,
jobGraph.getClasspaths,
userCodeLoader)

currentJobs.put(jobGraph.getJobID, (graph, jobInfo))
graph
}

// configure the execution graph
val jobNumberRetries = if (jobGraph.getNumberOfExecutionRetries() >= 0) {
jobGraph.getNumberOfExecutionRetries()
} else {
defaultExecutionRetries
}

val executionRetryDelay = if (jobGraph.getExecutionRetryDelay() >= 0) {
jobGraph.getExecutionRetryDelay()
}
else {
delayBetweenRetries
}

executionGraph.setNumberOfRetriesLeft(jobNumberRetries)
executionGraph.setDelayBeforeRetrying(executionRetryDelay)
executionGraph.setScheduleMode(jobGraph.getScheduleMode())
executionGraph.setQueuedSchedulingAllowed(jobGraph.getAllowQueuedScheduling())

try {
executionGraph.setJsonPlan(JsonPlanGenerator.generatePlan(jobGraph))
}
catch {
case t: Throwable =>
log.warn("Cannot create JSON plan for job", t)
executionGraph.setJsonPlan("{}")
}

// initialize the vertices that have a master initialization hook
// file output formats create directories here, input formats create splits
if (log.isDebugEnabled) {
log.debug(s"Running initialization on master for job ${jobId} (${jobName}).")
}

val numSlots = scheduler.getTotalNumberOfSlots()

for (vertex <- jobGraph.getVertices.asScala) {
val executableClass = vertex.getInvokableClassName
if (executableClass == null || executableClass.length == 0) {
throw new JobSubmissionException(jobId,
s"The vertex ${vertex.getID} (${vertex.getName}) has no invokable class.")
}

if (vertex.getParallelism() == ExecutionConfig.PARALLELISM_AUTO_MAX) {
vertex.setParallelism(numSlots)
}

try {
vertex.initializeOnMaster(userCodeLoader)
}
catch {
case t: Throwable =>
throw new JobExecutionException(jobId,
"Cannot initialize task '" + vertex.getName() + "': " + t.getMessage, t)
}
}

// topologically sort the job vertices and attach the graph to the existing one
val sortedTopology = jobGraph.getVerticesSortedTopologicallyFromSources()
if (log.isDebugEnabled) {
log.debug(s"Adding ${sortedTopology.size()} vertices from " +
s"job graph ${jobId} (${jobName}).")
}
executionGraph.attachJobGraph(sortedTopology)

if (log.isDebugEnabled) {
log.debug("Successfully created execution graph from job " +
s"graph ${jobId} (${jobName}).")
}

// configure the state checkpointing
val snapshotSettings = jobGraph.getSnapshotSettings
if (snapshotSettings != null) {
val jobId = jobGraph.getJobID()

val idToVertex: JobVertexID => ExecutionJobVertex = id => {
val vertex = executionGraph.getJobVertex(id)
if (vertex == null) {
throw new JobSubmissionException(jobId,
"The snapshot checkpointing settings refer to non-existent vertex " + id)
}
vertex
}

val triggerVertices: java.util.List[ExecutionJobVertex] =
snapshotSettings.getVerticesToTrigger().asScala.map(idToVertex).asJava

val ackVertices: java.util.List[ExecutionJobVertex] =
snapshotSettings.getVerticesToAcknowledge().asScala.map(idToVertex).asJava

val confirmVertices: java.util.List[ExecutionJobVertex] =
snapshotSettings.getVerticesToConfirm().asScala.map(idToVertex).asJava

val completedCheckpoints = checkpointRecoveryFactory
.createCompletedCheckpoints(jobId, userCodeLoader)

val checkpointIdCounter = checkpointRecoveryFactory.createCheckpointIDCounter(jobId)

executionGraph.enableSnapshotCheckpointing(
snapshotSettings.getCheckpointInterval,
snapshotSettings.getCheckpointTimeout,
triggerVertices,
ackVertices,
confirmVertices,
context.system,
leaderSessionID.orNull,
checkpointIdCounter,
completedCheckpoints,
recoveryMode)
}

// get notified about job status changes
executionGraph.registerJobStatusListener(
new AkkaActorGateway(self, leaderSessionID.orNull))

if (jobInfo.listeningBehaviour == ListeningBehaviour.EXECUTION_RESULT_AND_STATE_CHANGES) {
// the sender wants to be notified about state changes
val gateway = new AkkaActorGateway(jobInfo.client, leaderSessionID.orNull)

executionGraph.registerExecutionListener(gateway)
executionGraph.registerJobStatusListener(gateway)
}
}
catch {
case t: Throwable =>
log.error(s"Failed to submit job ${jobId} (${jobName})", t)

libraryCacheManager.unregisterJob(jobId)
currentJobs.remove(jobId)

if (executionGraph != null) {
executionGraph.fail(t)
}

val rt: Throwable = if (t.isInstanceOf[JobExecutionException]) {
t
} else {
new JobExecutionException(jobId, s"Failed to submit job ${jobId} (${jobName})", t)
}

jobInfo.client ! decorateMessage(JobResultFailure(new SerializedThrowable(rt)))
return
}

// execute the recovery/writing the jobGraph into the SubmittedJobGraphStore asynchronously
// because it is a blocking operation
future {
try {
if (isRecovery) {
executionGraph.restoreLatestCheckpointedState()
}
else {
submittedJobGraphs.putJobGraph(new SubmittedJobGraph(jobGraph, jobInfo))
}

jobInfo.client ! decorateMessage(JobSubmitSuccess(jobGraph.getJobID))

if (leaderElectionService.hasLeadership) {
// There is a small chance that multiple job managers schedule the same job after if
// they try to recover at the same time. This will eventually be noticed, but can not be
// ruled out from the beginning.

// NOTE: Scheduling the job for execution is a separate action from the job submission.
// The success of submitting the job must be independent from the success of scheduling
// the job.
log.info(s"Scheduling job $jobId ($jobName).")

executionGraph.scheduleForExecution(scheduler)
} else {
// Remove the job graph. Otherwise it will be lingering around and possibly removed from
// ZooKeeper by this JM.
self ! decorateMessage(RemoveJob(jobId, false))

log.warn(s"Submitted job $jobId, but not leader. The other leader needs to recover " +
"this. I am not scheduling the job for execution.")
}
} catch {
case t: Throwable => try {
executionGraph.fail(t)
}
catch {
case tt: Throwable => {
log.error("Error while marking ExecutionGraph as failed.", tt)
}
}
}
}(context.dispatcher)
}
}

/**
* Dedicated handler for checkpoint messages.
*
* @param actorMessage The checkpoint actor message.
*/
private def handleCheckpointMessage(actorMessage: AbstractCheckpointMessage): Unit = {
actorMessage match {
case ackMessage: AcknowledgeCheckpoint =>
val jid = ackMessage.getJob()
currentJobs.get(jid) match {
case Some((graph, _)) =>
val coordinator = graph.getCheckpointCoordinator()
if (coordinator != null) {
future {
try {
coordinator.receiveAcknowledgeMessage(ackMessage)
}
catch {
case t: Throwable =>
log.error(s"Error in CheckpointCoordinator while processing $ackMessage", t)
}
}(context.dispatcher)
}
else {
log.error(
s"Received ConfirmCheckpoint message for job $jid with no CheckpointCoordinator")
}

case None => log.error(s"Received ConfirmCheckpoint for unavailable job $jid")
}

// unknown checkpoint message
case _ => unhandled(actorMessage)
}
}

/**
* Handle unmatched messages with an exception.
*/
override def unhandled(message: Any): Unit = {
// let the actor crash
throw new RuntimeException("Received unknown message " + message)
}

/**
* Handle messages that request or report accumulators.
*
* @param message The accumulator message.
*/
private def handleAccumulatorMessage(message: AccumulatorMessage): Unit = {
message match {
case RequestAccumulatorResults(jobID) =>
try {
currentJobs.get(jobID) match {
case Some((graph, jobInfo)) =>
val accumulatorValues = graph.getAccumulatorsSerialized()
sender() ! decorateMessage(AccumulatorResultsFound(jobID, accumulatorValues))
case None =>
archive.forward(message)
}
} catch {
case e: Exception =>
log.error("Cannot serialize accumulator result.", e)
sender() ! decorateMessage(AccumulatorResultsErroneous(jobID, e))
}

case RequestAccumulatorResultsStringified(jobId) =>
currentJobs.get(jobId) match {
case Some((graph, jobInfo)) =>
val stringifiedAccumulators = graph.getAccumulatorResultsStringified()
sender() ! decorateMessage(
AccumulatorResultStringsFound(jobId, stringifiedAccumulators)
)
case None =>
archive.forward(message)
}

case unknown =>
log.warn(s"Received unknown AccumulatorMessage: $unknown")
}
}

/**
* Dedicated handler for monitor info request messages.
*
* Note that this handler does not fail. Errors while responding to info messages are logged,
* but will not cause the actor to crash.
*
* @param actorMessage The info request message.
*/
private def handleInfoRequestMessage(actorMessage: InfoMessage, theSender: ActorRef): Unit = {
try {
actorMessage match {

case _ : RequestJobsOverview =>
// get our own overview
val ourJobs = createJobStatusOverview()

// get the overview from the archive
val future = (archive ? RequestJobsOverview.getInstance())(timeout)

future.onSuccess {
case archiveOverview: JobsOverview =>
theSender ! new JobsOverview(ourJobs, archiveOverview)
}(context.dispatcher)

case _ : RequestJobsWithIDsOverview =>
// get our own overview
val ourJobs = createJobStatusWithIDsOverview()

// get the overview from the archive
val future = (archive ? RequestJobsWithIDsOverview.getInstance())(timeout)

future.onSuccess {
case archiveOverview: JobsWithIDsOverview =>
theSender ! new JobsWithIDsOverview(ourJobs, archiveOverview)
}(context.dispatcher)

case _ : RequestStatusOverview =>

val ourJobs = createJobStatusOverview()

val numTMs = instanceManager.getNumberOfRegisteredTaskManagers()
val numSlotsTotal = instanceManager.getTotalNumberOfSlots()
val numSlotsAvailable = instanceManager.getNumberOfAvailableSlots()

// add to that the jobs from the archive
val future = (archive ? RequestJobsOverview.getInstance())(timeout)
future.onSuccess {
case archiveOverview: JobsOverview =>
theSender ! new StatusOverview(numTMs, numSlotsTotal, numSlotsAvailable,
ourJobs, archiveOverview)
}(context.dispatcher)

case msg : RequestJobDetails =>

val ourDetails: Array[JobDetails] = if (msg.shouldIncludeRunning()) {
currentJobs.values.map {
v => WebMonitorUtils.createDetailsForJob(v._1)
}.toArray[JobDetails]
} else {
null
}

if (msg.shouldIncludeFinished()) {
val future = (archive ? msg)(timeout)
future.onSuccess {
case archiveDetails: MultipleJobsDetails =>
theSender ! new MultipleJobsDetails(ourDetails, archiveDetails.getFinishedJobs())
}(context.dispatcher)
} else {
theSender ! new MultipleJobsDetails(ourDetails, null)
}

case _ => log.error("Unrecognized info message " + actorMessage)
}
}
catch {
case e: Throwable => log.error(s"Error responding to message $actorMessage", e)
}
}

private def createJobStatusOverview() : JobsOverview = {
var runningOrPending = 0
var finished = 0
var canceled = 0
var failed = 0

currentJobs.values.foreach {
_._1.getState() match {
case JobStatus.FINISHED => finished += 1
case JobStatus.CANCELED => canceled += 1
case JobStatus.FAILED => failed += 1
case _ => runningOrPending += 1
}
}

new JobsOverview(runningOrPending, finished, canceled, failed)
}

private def createJobStatusWithIDsOverview() : JobsWithIDsOverview = {
val runningOrPending = new java.util.ArrayList[JobID]()
val finished = new java.util.ArrayList[JobID]()
val canceled = new java.util.ArrayList[JobID]()
val failed = new java.util.ArrayList[JobID]()

currentJobs.values.foreach { case (graph, _) =>
graph.getState() match {
case JobStatus.FINISHED => finished.add(graph.getJobID)
case JobStatus.CANCELED => canceled.add(graph.getJobID)
case JobStatus.FAILED => failed.add(graph.getJobID)
case _ => runningOrPending.add(graph.getJobID)
}
}

new JobsWithIDsOverview(runningOrPending, finished, canceled, failed)
}

/**
* Removes the job and sends it to the MemoryArchivist.
*
* This should be called asynchronously. Removing the job from the [[SubmittedJobGraphStore]]
* might block. Therefore be careful not to block the actor thread.
*
* @param jobID ID of the job to remove and archive
* @param removeJobFromStateBackend true if the job shall be archived and removed from the state
* backend
*/
private def removeJob(jobID: JobID, removeJobFromStateBackend: Boolean): Option[Future[Unit]] = {
// Don't remove the job yet...
val futureOption = currentJobs.get(jobID) match {
case Some((eg, _)) =>
val result = if (removeJobFromStateBackend) {
val futureOption = Some(future {
try {
// ...otherwise, we can have lingering resources when there is a concurrent shutdown
// and the ZooKeeper client is closed. Not removing the job immediately allow the
// shutdown to release all resources.
submittedJobGraphs.removeJobGraph(jobID)
} catch {
case t: Throwable => log.error(s"Could not remove submitted job graph $jobID.", t)
}
}(context.dispatcher))

try {
eg.prepareForArchiving()

archive ! decorateMessage(ArchiveExecutionGraph(jobID, eg))
} catch {
case t: Throwable => log.error(s"Could not prepare the execution graph $eg for " +
"archiving.", t)
}

futureOption
} else {
None
}

currentJobs.remove(jobID)

result
case None => None
}

try {
libraryCacheManager.unregisterJob(jobID)
} catch {
case t: Throwable =>
log.error(s"Could not properly unregister job $jobID form the library cache.", t)
}

futureOption
}

/** Fails all currently running jobs and empties the list of currently running jobs. If the
* [[JobClientActor]] waits for a result, then a [[JobExecutionException]] is sent.
*
* @param cause Cause for the cancelling.
*/
private def cancelAndClearEverything(
cause: Throwable,
removeJobFromStateBackend: Boolean)
: Seq[Future[Unit]] = {
val futures = for ((jobID, (eg, jobInfo)) <- currentJobs) yield {
future {
if (removeJobFromStateBackend) {
try {
submittedJobGraphs.removeJobGraph(jobID)
}
catch {
case t: Throwable => {
log.error("Error during submitted job graph clean up.", t)
}
}
}

eg.fail(cause)

if (jobInfo.listeningBehaviour != ListeningBehaviour.DETACHED) {
jobInfo.client ! decorateMessage(
Failure(new JobExecutionException(jobID, "All jobs are cancelled and cleared.", cause)))
}
}(context.dispatcher)
}

currentJobs.clear()

futures.toSeq
}

override def grantLeadership(newLeaderSessionID: UUID): Unit = {
self ! decorateMessage(GrantLeadership(Option(newLeaderSessionID)))
}

override def revokeLeadership(): Unit = {
leaderSessionID = None
self ! decorateMessage(RevokeLeadership)
}

override def onAddedJobGraph(jobId: JobID): Unit = {
if (leaderSessionID.isDefined && !currentJobs.contains(jobId)) {
self ! decorateMessage(RecoverJob(jobId))
}
}

override def onRemovedJobGraph(jobId: JobID): Unit = {
if (leaderSessionID.isDefined) {
currentJobs.get(jobId).foreach(
job =>
future {
// Fail the execution graph
job._1.fail(new IllegalStateException("Another JobManager removed the job from " +
"ZooKeeper."))
}(context.dispatcher)
)
}
}

override def getAddress: String = {
AkkaUtils.getAkkaURL(context.system, self)
}

/** Handles error occuring in the leader election service
*
* @param exception
*/
override def handleError(exception: Exception): Unit = {
log.error("Received an error from the LeaderElectionService.", exception)

// terminate JobManager in case of an error
self ! decorateMessage(PoisonPill)
}

/**
* Updates the accumulators reported from a task manager via the Heartbeat message.
* @param accumulators list of accumulator snapshots
*/
private def updateAccumulators(accumulators : Seq[AccumulatorSnapshot]) = {
accumulators foreach {
case accumulatorEvent =>
currentJobs.get(accumulatorEvent.getJobID) match {
case Some((jobGraph, jobInfo)) =>
future {
jobGraph.updateAccumulators(accumulatorEvent)
}(context.dispatcher)
case None =>
// ignore accumulator values for old job
}
}
}
}

/**
* Job Manager companion object. Contains the entry point (main method) to run the JobManager in a
* standalone fashion. Also contains various utility methods to start the JobManager and to
* look up the JobManager actor reference.
*/
object JobManager {

val LOG = Logger(classOf[JobManager])

val STARTUP_FAILURE_RETURN_CODE = 1
val RUNTIME_FAILURE_RETURN_CODE = 2

/** Name of the JobManager actor */
val JOB_MANAGER_NAME = "jobmanager"

/** Name of the archive actor */
val ARCHIVE_NAME = "archive"

/**
* Entry point (main method) to run the JobManager in a standalone fashion.
*
* @param args The command line arguments.
*/
def main(args: Array[String]): Unit = {
// startup checks and logging
EnvironmentInformation.logEnvironmentInfo(LOG.logger, "JobManager", args)
EnvironmentInformation.checkJavaVersion()

// parsing the command line arguments
val (configuration: Configuration,
executionMode: JobManagerMode,
streamingMode: StreamingMode,
listeningHost: String, listeningPort: Int) =
try {
parseArgs(args)
}
catch {
case t: Throwable => {
LOG.error(t.getMessage(), t)
System.exit(STARTUP_FAILURE_RETURN_CODE)
null
}
}

// we want to check that the JobManager hostname is in the config
// if it is not in there, the actor system will bind to the loopback interface's
// address and will not be reachable from anyone remote
if (listeningHost == null) {
val message = "Config parameter '" + ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY +
"' is missing (hostname/address to bind JobManager to)."
LOG.error(message)
System.exit(STARTUP_FAILURE_RETURN_CODE)
}

if (ZooKeeperUtils.isZooKeeperRecoveryMode(configuration)) {
// address and will not be reachable from anyone remote
if (listeningPort != 0) {
val message = "Config parameter '" + ConfigConstants.JOB_MANAGER_IPC_PORT_KEY +
"' is invalid, it must be equal to 0."
LOG.error(message)
System.exit(STARTUP_FAILURE_RETURN_CODE)
}
} else {
// address and will not be reachable from anyone remote
if (listeningPort <= 0 || listeningPort >= 65536) {
val message = "Config parameter '" + ConfigConstants.JOB_MANAGER_IPC_PORT_KEY +
"' is invalid, it must be greater than 0 and less than 65536."
LOG.error(message)
System.exit(STARTUP_FAILURE_RETURN_CODE)
}
}

// run the job manager
try {
if (SecurityUtils.isSecurityEnabled) {
LOG.info("Security is enabled. Starting secure JobManager.")
SecurityUtils.runSecured(new FlinkSecuredRunner[Unit] {
override def run(): Unit = {
runJobManager(
configuration,
executionMode,
streamingMode,
listeningHost,
listeningPort)
}
})
}
else {
LOG.info("Security is not enabled. Starting non-authenticated JobManager.")
runJobManager(
configuration,
executionMode,
streamingMode,
listeningHost,
listeningPort)
}
}
catch {
case t: Throwable => {
LOG.error("Failed to run JobManager.", t)
System.exit(STARTUP_FAILURE_RETURN_CODE)
}
}
}

/**
* Starts and runs the JobManager with all its components. First, this method starts a
* dedicated actor system for the JobManager. Second, its starts all components of the
* JobManager (including library cache, instance manager, scheduler). Finally, it starts
* the JobManager actor itself.
*
* This method blocks indefinitely (or until the JobManager's actor system is shut down).
*
* @param configuration The configuration object for the JobManager.
* @param executionMode The execution mode in which to run. Execution mode LOCAL will spawn an
* an additional TaskManager in the same process.
* @param streamingMode The streaming mode to run the system in (streaming vs. batch-only)
* @param listeningAddress The hostname where the JobManager should listen for messages.
* @param listeningPort The port where the JobManager should listen for messages.
*/
def runJobManager(
configuration: Configuration,
executionMode: JobManagerMode,
streamingMode: StreamingMode,
listeningAddress: String,
listeningPort: Int)
: Unit = {

val (jobManagerSystem, _, _, _) = startActorSystemAndJobManagerActors(
configuration,
executionMode,
streamingMode,
listeningAddress,
listeningPort,
classOf[JobManager],
classOf[MemoryArchivist]
)

// block until everything is shut down
jobManagerSystem.awaitTermination()
}

/** Starts an ActorSystem, the JobManager and all its components including the WebMonitor.
*
* @param configuration The configuration object for the JobManager
* @param executionMode The execution mode in which to run. Execution mode LOCAL with spawn an
* additional TaskManager in the same process.
* @param streamingMode The streaming mode to run the system in (streaming vs. batch-only)
* @param listeningAddress The hostname where the JobManager should lsiten for messages.
* @param listeningPort The port where the JobManager should listen for messages
* @param jobManagerClass The class of the JobManager to be started
* @param archiveClass The class of the Archivist to be started
* @return A tuple containing the started ActorSystem, ActorRefs to the JobManager and the
* Archivist and an Option containing a possibly started WebMonitor
*/
def startActorSystemAndJobManagerActors(
configuration: Configuration,
executionMode: JobManagerMode,
streamingMode: StreamingMode,
listeningAddress: String,
listeningPort: Int,
jobManagerClass: Class[_ <: JobManager],
archiveClass: Class[_ <: MemoryArchivist])
: (ActorSystem, ActorRef, ActorRef, Option[WebMonitor]) = {

LOG.info("Starting JobManager")

// Bring up the job manager actor system first, bind it to the given address.
val hostPortUrl = NetUtils.hostAndPortToUrlString(listeningAddress, listeningPort)
LOG.info(s"Starting JobManager actor system at $hostPortUrl")

val jobManagerSystem = try {
val akkaConfig = AkkaUtils.getAkkaConfig(
configuration,
Some((listeningAddress, listeningPort))
)
if (LOG.isDebugEnabled) {
LOG.debug("Using akka configuration\n " + akkaConfig)
}
AkkaUtils.createActorSystem(akkaConfig)
}
catch {
case t: Throwable => {
if (t.isInstanceOf[org.jboss.netty.channel.ChannelException]) {
val cause = t.getCause()
if (cause != null && t.getCause().isInstanceOf[java.net.BindException]) {
val address = listeningAddress + ":" + listeningPort
throw new Exception("Unable to create JobManager at address " + address +
" - " + cause.getMessage(), t)
}
}
throw new Exception("Could not create JobManager actor system", t)
}
}

val address = AkkaUtils.getAddress(jobManagerSystem)

configuration.setString(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY, address.host.get)
configuration.setInteger(ConfigConstants.JOB_MANAGER_IPC_PORT_KEY, address.port.get)

val webMonitor: Option[WebMonitor] =
if (configuration.getInteger(ConfigConstants.JOB_MANAGER_WEB_PORT_KEY, 0) >= 0) {
LOG.info("Starting JobManger web frontend")
val leaderRetrievalService = LeaderRetrievalUtils
.createLeaderRetrievalService(configuration)

// start the web frontend. we need to load this dynamically
// because it is not in the same project/dependencies
val webServer = WebMonitorUtils.startWebRuntimeMonitor(
configuration,
leaderRetrievalService,
jobManagerSystem)

Option(webServer)
}
else {
None
}

// Reset the port (necessary in case of automatic port selection)
webMonitor.foreach{ monitor => configuration.setInteger(
ConfigConstants.JOB_MANAGER_WEB_PORT_KEY, monitor.getServerPort) }

try {
// bring up the job manager actor
LOG.info("Starting JobManager actor")
val (jobManager, archive) = startJobManagerActors(
configuration,
jobManagerSystem,
streamingMode,
jobManagerClass,
archiveClass)

// start a process reaper that watches the JobManager. If the JobManager actor dies,
// the process reaper will kill the JVM process (to ensure easy failure detection)
LOG.debug("Starting JobManager process reaper")
jobManagerSystem.actorOf(
Props(
classOf[ProcessReaper],
jobManager,
LOG.logger,
RUNTIME_FAILURE_RETURN_CODE),
"JobManager_Process_Reaper")

// bring up a local task manager, if needed
if (executionMode == JobManagerMode.LOCAL) {
LOG.info("Starting embedded TaskManager for JobManager's LOCAL execution mode")

val taskManagerActor = TaskManager.startTaskManagerComponentsAndActor(
configuration,
jobManagerSystem,
listeningAddress,
Some(TaskManager.TASK_MANAGER_NAME),
None,
true,
streamingMode,
classOf[TaskManager])

LOG.debug("Starting TaskManager process reaper")
jobManagerSystem.actorOf(
Props(
classOf[ProcessReaper],
taskManagerActor,
LOG.logger,
RUNTIME_FAILURE_RETURN_CODE),
"TaskManager_Process_Reaper")
}

webMonitor.foreach {
monitor =>
val jobManagerAkkaUrl = JobManager.getRemoteJobManagerAkkaURL(configuration)
monitor.start(jobManagerAkkaUrl)
}

(jobManagerSystem, jobManager, archive, webMonitor)
}
catch {
case t: Throwable => {
LOG.error("Error while starting up JobManager", t)
try {
jobManagerSystem.shutdown()
} catch {
case tt: Throwable => LOG.warn("Could not cleanly shut down actor system", tt)
}
throw t
}
}
}

/**
* Loads the configuration, execution mode and the listening address from the provided command
* line arguments.
*
* @param args command line arguments
* @return Quadruple of configuration, execution mode and an optional listening address
*/
def parseArgs(args: Array[String]):
(Configuration, JobManagerMode, StreamingMode, String, Int) = {
val parser = new scopt.OptionParser[JobManagerCliOptions]("JobManager") {
head("Flink JobManager")

opt[String]("configDir") action { (arg, conf) =>
conf.setConfigDir(arg)
conf
} text {
"The configuration directory."
}

opt[String]("executionMode") action { (arg, conf) =>
conf.setJobManagerMode(arg)
conf
} text {
"The execution mode of the JobManager (CLUSTER / LOCAL)"
}

opt[String]("streamingMode").optional().action { (arg, conf) =>
conf.setStreamingMode(arg)
conf
} text {
"The streaming mode of the JobManager (STREAMING / BATCH)"
}

opt[String]("host").optional().action { (arg, conf) =>
conf.setHost(arg)
conf
} text {
"Network address for communication with the job manager"
}

opt[Int]("webui-port").optional().action { (arg, conf) =>
conf.setWebUIPort(arg)
conf
} text {
"Port for the UI web server"
}
}

val config = parser.parse(args, new JobManagerCliOptions()).getOrElse {
throw new Exception(
s"Invalid command line agruments: ${args.mkString(" ")}. Usage: ${parser.usage}")
}

val configDir = config.getConfigDir()

if (configDir == null) {
throw new Exception("Missing parameter '--configDir'")
}
if (config.getJobManagerMode() == null) {
throw new Exception("Missing parameter '--executionMode'")
}

LOG.info("Loading configuration from " + configDir)
GlobalConfiguration.loadConfiguration(configDir)
val configuration = GlobalConfiguration.getConfiguration()

if (new File(configDir).isDirectory) {
configuration.setString(ConfigConstants.FLINK_BASE_DIR_PATH_KEY, configDir + "/..")
}

if (config.getWebUIPort() >= 0) {
configuration.setInteger(ConfigConstants.JOB_MANAGER_WEB_PORT_KEY, config.getWebUIPort())
}

if (config.getHost() != null) {
configuration.setString(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY, config.getHost())
}

val host = configuration.getString(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY, null)

// high availability mode
val port: Int =
if (ZooKeeperUtils.isZooKeeperRecoveryMode(configuration)) {
LOG.info("Starting JobManager in High-Availability Mode")

configuration.setInteger(ConfigConstants.JOB_MANAGER_IPC_PORT_KEY, 0)
0
}
else {
LOG.info("Staring JobManager without high-availability")

configuration.getInteger(ConfigConstants.JOB_MANAGER_IPC_PORT_KEY,
ConfigConstants.DEFAULT_JOB_MANAGER_IPC_PORT)
}

val executionMode = config.getJobManagerMode
val streamingMode = config.getStreamingMode
val hostPortUrl = NetUtils.hostAndPortToUrlString(host, port)

LOG.info(s"Starting JobManager on $hostPortUrl with execution mode $executionMode and " +
s"streaming mode $streamingMode")

(configuration, executionMode, streamingMode, host, port)
}

/**
* Create the job manager components as (instanceManager, scheduler, libraryCacheManager,
* archiverProps, defaultExecutionRetries,
* delayBetweenRetries, timeout)
*
* @param configuration The configuration from which to parse the config values.
* @param leaderElectionServiceOption LeaderElectionService which shall be returned if the option
* is defined
* @return The members for a default JobManager.
*/
def createJobManagerComponents(
configuration: Configuration,
leaderElectionServiceOption: Option[LeaderElectionService]) :
(ExecutionContext,
InstanceManager,
FlinkScheduler,
BlobLibraryCacheManager,
Int, // execution retries
Long, // delay between retries
FiniteDuration, // timeout
Int, // number of archived jobs
LeaderElectionService,
SubmittedJobGraphStore,
CheckpointRecoveryFactory) = {

val timeout: FiniteDuration = AkkaUtils.getTimeout(configuration)

val cleanupInterval = configuration.getLong(
ConfigConstants.LIBRARY_CACHE_MANAGER_CLEANUP_INTERVAL,
ConfigConstants.DEFAULT_LIBRARY_CACHE_MANAGER_CLEANUP_INTERVAL) * 1000

val executionRetries = configuration.getInteger(
ConfigConstants.DEFAULT_EXECUTION_RETRIES_KEY,
ConfigConstants.DEFAULT_EXECUTION_RETRIES)

val archiveCount = configuration.getInteger(ConfigConstants.JOB_MANAGER_WEB_ARCHIVE_COUNT,
ConfigConstants.DEFAULT_JOB_MANAGER_WEB_ARCHIVE_COUNT)

// configure the delay between execution retries.
// unless explicitly specifies, this is dependent on the heartbeat timeout
val pauseString = configuration.getString(ConfigConstants.AKKA_WATCH_HEARTBEAT_PAUSE,
ConfigConstants.DEFAULT_AKKA_ASK_TIMEOUT)
val delayString = configuration.getString(ConfigConstants.DEFAULT_EXECUTION_RETRY_DELAY_KEY,
pauseString)

val delayBetweenRetries: Long = try {
Duration(delayString).toMillis
}
catch {
case n: NumberFormatException => throw new Exception(
s"Invalid config value for ${ConfigConstants.DEFAULT_EXECUTION_RETRY_DELAY_KEY}: " +
s"${pauseString}. Value must be a valid duration (such as 100 milli or 1 min)");
}

val executionContext = ExecutionContext.fromExecutor(new ForkJoinPool())

var blobServer: BlobServer = null
var instanceManager: InstanceManager = null
var scheduler: FlinkScheduler = null
var libraryCacheManager: BlobLibraryCacheManager = null

try {
blobServer = new BlobServer(configuration)
instanceManager = new InstanceManager()
scheduler = new FlinkScheduler(executionContext)
libraryCacheManager = new BlobLibraryCacheManager(blobServer, cleanupInterval)

instanceManager.addInstanceListener(scheduler)
}
catch {
case t: Throwable => {
if (libraryCacheManager != null) {
libraryCacheManager.shutdown()
}
if (scheduler != null) {
scheduler.shutdown()
}
if (instanceManager != null) {
instanceManager.shutdown()
}
if (blobServer != null) {
blobServer.shutdown()
}
throw t
}
}

// Create recovery related components
val (leaderElectionService, submittedJobGraphs, checkpointRecoveryFactory) =
RecoveryMode.fromConfig(configuration) match {
case RecoveryMode.STANDALONE =>
val leaderElectionService = leaderElectionServiceOption match {
case Some(les) => les
case None => new StandaloneLeaderElectionService()
}

(leaderElectionService,
new StandaloneSubmittedJobGraphStore(),
new StandaloneCheckpointRecoveryFactory())

case RecoveryMode.ZOOKEEPER =>
val client = ZooKeeperUtils.startCuratorFramework(configuration)

val leaderElectionService = leaderElectionServiceOption match {
case Some(les) => les
case None => ZooKeeperUtils.createLeaderElectionService(client, configuration)
}

(leaderElectionService,
ZooKeeperUtils.createSubmittedJobGraphs(client, configuration),
new ZooKeeperCheckpointRecoveryFactory(client, configuration))
}

(executionContext,
instanceManager,
scheduler,
libraryCacheManager,
executionRetries,
delayBetweenRetries,
timeout,
archiveCount,
leaderElectionService,
submittedJobGraphs,
checkpointRecoveryFactory)
}

/**
* Starts the JobManager and job archiver based on the given configuration, in the
* given actor system.
*
* @param configuration The configuration for the JobManager
* @param actorSystem The actor system running the JobManager
* @param streamingMode The execution mode
* @param jobManagerClass The class of the JobManager to be started
* @param archiveClass The class of the MemoryArchivist to be started
*
* @return A tuple of references (JobManager Ref, Archiver Ref)
*/
def startJobManagerActors(
configuration: Configuration,
actorSystem: ActorSystem,
streamingMode: StreamingMode,
jobManagerClass: Class[_ <: JobManager],
archiveClass: Class[_ <: MemoryArchivist])
: (ActorRef, ActorRef) = {

startJobManagerActors(
configuration,
actorSystem,
Some(JOB_MANAGER_NAME),
Some(ARCHIVE_NAME),
streamingMode,
jobManagerClass,
archiveClass)
}

/**
* Starts the JobManager and job archiver based on the given configuration, in the
* given actor system.
*
* @param configuration The configuration for the JobManager
* @param actorSystem The actor system running the JobManager
* @param jobMangerActorName Optionally the name of the JobManager actor. If none is given,
* the actor will have the name generated by the actor system.
* @param archiveActorName Optionally the name of the archive actor. If none is given,
* the actor will have the name generated by the actor system.
* @param streamingMode The mode to run the system in (streaming vs. batch-only)
* @param jobManagerClass The class of the JobManager to be started
* @param archiveClass The class of the MemoryArchivist to be started
*
* @return A tuple of references (JobManager Ref, Archiver Ref)
*/
def startJobManagerActors(
configuration: Configuration,
actorSystem: ActorSystem,
jobMangerActorName: Option[String],
archiveActorName: Option[String],
streamingMode: StreamingMode,
jobManagerClass: Class[_ <: JobManager],
archiveClass: Class[_ <: MemoryArchivist])
: (ActorRef, ActorRef) = {

val (executionContext,
instanceManager,
scheduler,
libraryCacheManager,
executionRetries,
delayBetweenRetries,
timeout,
archiveCount,
leaderElectionService,
submittedJobGraphs,
checkpointRecoveryFactory) = createJobManagerComponents(
configuration,
None)

val archiveProps = Props(archiveClass, archiveCount)

// start the archiver with the given name, or without (avoid name conflicts)
val archive: ActorRef = archiveActorName match {
case Some(actorName) => actorSystem.actorOf(archiveProps, actorName)
case None => actorSystem.actorOf(archiveProps)
}

val jobManagerProps = Props(
jobManagerClass,
configuration,
executionContext,
instanceManager,
scheduler,
libraryCacheManager,
archive,
executionRetries,
delayBetweenRetries,
timeout,
streamingMode,
leaderElectionService,
submittedJobGraphs,
checkpointRecoveryFactory)

val jobManager: ActorRef = jobMangerActorName match {
case Some(actorName) => actorSystem.actorOf(jobManagerProps, actorName)
case None => actorSystem.actorOf(jobManagerProps)
}

(jobManager, archive)
}

def startActor(props: Props, actorSystem: ActorSystem): ActorRef = {
actorSystem.actorOf(props, JOB_MANAGER_NAME)
}

// --------------------------------------------------------------------------
// Resolving the JobManager endpoint
// --------------------------------------------------------------------------

/**
* Builds the akka actor path for the JobManager actor, given the socket address
* where the JobManager's actor system runs.
*
* @param address The address of the JobManager's actor system.
* @return The akka URL of the JobManager actor.
*/
def getRemoteJobManagerAkkaURL(
address: InetSocketAddress,
name: Option[String] = None)
: String = {
val hostPort = NetUtils.socketAddressToUrlString(address)

getJobManagerAkkaURLHelper(s"akka.tcp://flink@$hostPort", name)
}

/**
* Returns the JobManager actor's remote Akka URL, given the configured hostname and port.
*
* @param config The configuration to parse
* @return JobManager actor remote Akka URL
*/
def getRemoteJobManagerAkkaURL(config: Configuration) : String = {
val (hostname, port) = TaskManager.getAndCheckJobManagerAddress(config)

var hostPort: InetSocketAddress = null

try {
val inetAddress: InetAddress = InetAddress.getByName(hostname)
hostPort = new InetSocketAddress(inetAddress, port)
}
catch {
case e: UnknownHostException => {
throw new UnknownHostException(s"Cannot resolve the JobManager hostname '$hostname' " +
s"specified in the configuration")
}
}

JobManager.getRemoteJobManagerAkkaURL(hostPort, Option.empty)
}

/**
* Builds the akka actor path for the JobManager actor to address the actor within
* its own actor system.
*
* @return The local akka URL of the JobManager actor.
*/
def getLocalJobManagerAkkaURL(name: Option[String] = None): String = {
getJobManagerAkkaURLHelper("akka://flink", name)
}

def getJobManagerAkkaURL(system: ActorSystem, name: Option[String] = None): String = {
getJobManagerAkkaURLHelper(AkkaUtils.getAddress(system).toString, name)
}

private def getJobManagerAkkaURLHelper(address: String, name: Option[String]): String = {
address + "/user/" + name.getOrElse(JOB_MANAGER_NAME)
}

def getJobManagerActorRefFuture(
address: InetSocketAddress,
system: ActorSystem,
timeout: FiniteDuration)
: Future[ActorRef] = {
AkkaUtils.getActorRefFuture(getRemoteJobManagerAkkaURL(address), system, timeout)
}

/**
* Resolves the JobManager actor reference in a blocking fashion.
*
* @param jobManagerUrl The akka URL of the JobManager.
* @param system The local actor system that should perform the lookup.
* @param timeout The maximum time to wait until the lookup fails.
* @throws java.io.IOException Thrown, if the lookup fails.
* @return The ActorRef to the JobManager
*/
@throws(classOf[IOException])
def getJobManagerActorRef(
jobManagerUrl: String,
system: ActorSystem,
timeout: FiniteDuration)
: ActorRef = {
AkkaUtils.getActorRef(jobManagerUrl, system, timeout)
}

/**
* Resolves the JobManager actor reference in a blocking fashion.
*
* @param address The socket address of the JobManager's actor system.
* @param system The local actor system that should perform the lookup.
* @param timeout The maximum time to wait until the lookup fails.
* @throws java.io.IOException Thrown, if the lookup fails.
* @return The ActorRef to the JobManager
*/
@throws(classOf[IOException])
def getJobManagerActorRef(
address: InetSocketAddress,
system: ActorSystem,
timeout: FiniteDuration)
: ActorRef = {

val jmAddress = getRemoteJobManagerAkkaURL(address)
getJobManagerActorRef(jmAddress, system, timeout)
}

/**
* Resolves the JobManager actor reference in a blocking fashion.
*
* @param address The socket address of the JobManager's actor system.
* @param system The local actor system that should perform the lookup.
* @param config The config describing the maximum time to wait until the lookup fails.
* @throws java.io.IOException Thrown, if the lookup fails.
* @return The ActorRef to the JobManager
*/
@throws(classOf[IOException])
def getJobManagerActorRef(
address: InetSocketAddress,
system: ActorSystem,
config: Configuration)
: ActorRef = {

val timeout = AkkaUtils.getLookupTimeout(config)
getJobManagerActorRef(address, system, timeout)
}
}