/**
* :: DeveloperApi ::
* Stores information about a stage to pass from the scheduler to SparkListeners.
*/@DeveloperApiclassStageInfo(
val stageId: Int,
val attemptId: Int,
val name: String,
val numTasks: Int,
val rddInfos: Seq[RDDInfo],
val parentIds: Seq[Int],
val details: String,
val taskMetrics: TaskMetrics = null,
private[spark] val taskLocalityPreferences: Seq[Seq[TaskLocation]] = Seq.empty) {/** When this stage was submitted from the DAGScheduler to a TaskScheduler. */var submissionTime: Option[Long] = None
/** Time when all tasks in the stage completed or when the stage was cancelled. */var completionTime: Option[Long] = None
/** If the stage failed, the reason why. */var failureReason: Option[String] = None
/**
* Terminal values of accumulables updated during this stage, including all the user-defined
* accumulators.
*/val accumulables = HashMap[Long, AccumulableInfo]()
def stageFailed(reason: String) {
failureReason = Some(reason)
completionTime = Some(System.currentTimeMillis)
}
private[spark] def getStatusString: String = {
if (completionTime.isDefined) {
if (failureReason.isDefined) {
"failed"
} else {
"succeeded"
}
} else {
"running"
}
}
}
object StageInfo
private[spark] objectStageInfo {/**
* Construct a StageInfo from a Stage.
*
* Each Stage is associated with one or many RDDs, with the boundary of a Stage marked by
* shuffle dependencies. Therefore, all ancestor RDDs related to this Stage's RDD through a
* sequence of narrow dependencies should also be associated with this Stage.
*/def fromStage(
stage: Stage,
attemptId: Int,
numTasks: Option[Int] = None,
taskMetrics: TaskMetrics = null,
taskLocalityPreferences: Seq[Seq[TaskLocation]] = Seq.empty
): StageInfo = {
val ancestorRddInfos = stage.rdd.getNarrowAncestors.map(RDDInfo.fromRdd)
val rddInfos = Seq(RDDInfo.fromRdd(stage.rdd)) ++ ancestorRddInfos
new StageInfo(
stage.id,
attemptId,
stage.name,
numTasks.getOrElse(stage.numTasks),
rddInfos,
stage.parents.map(_.id),
stage.details,
taskMetrics,
taskLocalityPreferences)
}
}