文章目录
NodeManager 启动Executor
Shell启动脚本
NodeManager
default_container_executor.sh
bash -c ‘java ..CoarseGrainedExecutorBackend’ --> 启动Executor 接收task计算任务
bash -c ‘java ..ExecutorLauncher’ --> 这里应该是直接奔着启动 ApplicationMaster 去了
/yarn/nm/usercache/hadoop/appcache/application_1557744110775_5172/container_e06_1557744110775_5172_01_000003/launch_container.sh
exec /bin/bash -c "LD_LIBRARY_PATH="$HADOOP_COMMON_HOME/../../../CDH-5.12.0-1.cdh5.12.0.p0.29/lib/hadoop/lib/native:$LD_LIBRARY_PATH" $JAVA_HOME/bin/java -server -XX:OnOutOfMemoryError='kill %p' -Xms4096m -Xmx4096m '-Xdebug' '-Xnoagent' '-Djava.compiler=NONE' '-Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=2346' -Djava.io.tmpdir=$PWD/tmp '-Dspark.authenticate.enableSaslEncryption=false' '-Dspark.authenticate=false' '-Dspark.driver.port=39563' '-Dspark.shuffle.service.port=7337' -Dspark.yarn.app.container.log.dir=/yarn/container-logs/application_1557744110775_5172/container_e06_1557744110775_5172_01_000003 org.apache.spark.executor.CoarseGrainedExecutorBackend --driver-url spark://CoarseGrainedScheduler@10.59.34.203:39563 --executor-id 2 --hostname host-10-59-34-204 --cores 1 --app-id application_1557744110775_5172 --user-class-path file:$PWD/__app__.jar 1>/yarn/container-logs/application_1557744110775_5172/container_e06_1557744110775_5172_01_000003/stdout 2>/yarn/container-logs/application_1557744110775_5172/container_e06_1557744110775_5172_01_000003/stderr"
CoarseGrainedExecutorBackend 启动 RPC EndPoint
// CoarseGrainedExecutorBackend
// main()
// run() 启动executor rpc Endpint
env.rpcEnv.setupEndpoint("Executor", new CoarseGrainedExecutorBackend(
env.rpcEnv, driverUrl, executorId, sparkHostPort, cores, userClassPath, env))
// CoarseGrainedExecutorBackend 通过rpc接收任务管理的调用
override def receive: PartialFunction[Any, Unit] = {
case RegisteredExecutor(hostname) =>
logInfo("Successfully registered with driver")
try {
executor = new Executor(executorId, hostname, env, userClassPath, isLocal = false)
} catch {
case NonFatal(e) =>
exitExecutor(1, "Unable to create executor due to " + e.getMessage, e)
}
case RegisterExecutorFailed(message) =>
exitExecutor(1, "Slave registration failed: " + message)
case LaunchTask(data) =>
if (executor == null) {
exitExecutor(1, "Received LaunchTask command but executor was null")
} else {
val taskDesc = ser.deserialize[TaskDescription](data.value)
logInfo("Got assigned task " + taskDesc.taskId)
// 这里根据接收到的参数,实例化一个TaskRunner(Runnable)对象,再使用线程池提交执行
executor.launchTask(this, taskId = taskDesc.taskId, attemptNumber = taskDesc.attemptNumber,
taskDesc.name, taskDesc.serializedTask)
}
case KillTask(taskId, _, interruptThread) =>
if (executor == null) {
exitExecutor(1, "Received KillTask command but executor was null")
} else {
executor.killTask(taskId, interruptThread)
}
case StopExecutor =>
stopping.set(true)
logInfo("Driver commanded a shutdown")
// Cannot shutdown here because an ack may need to be sent back to the caller. So send
// a message to self to actually do the shutdown.
self.send(Shutdown)
case Shutdown =>
stopping.set(true)
executor.stop()
stop()
rpcEnv.shutdown()
}
在 Executor 中,
- TaskRunner.run(Executor.scala:242)
- Task.run()
- Task.runTask(context) 执行的实现类为 ResultTask
Task的反序列化和执行
override def runTask(context: TaskContext): U = {
// Deserialize the RDD and the func using the broadcast variables.
val deserializeStartTime = System.currentTimeMillis()
val ser = SparkEnv.get.closureSerializer.newInstance()
// 补充
val (rdd, func) = ser.deserialize[(RDD[T], (TaskContext, Iterator[T]) => U)](
ByteBuffer.wrap(taskBinary.value), Thread.currentThread.getContextClassLoader)
_executorDeserializeTime = System.currentTimeMillis() - deserializeStartTime
metrics = Some(context.taskMetrics)
func(context, rdd.iterator(partition, context))
}
先看下 taskBinary 的注释说明 : broadcasted version of the serialized RDD and the function to apply on each partition of the given RDD. Once deserialized, the type should be (RDD[T], (TaskContext, Iterator[T]) => U).
- rdd
看一下 ser.deserialize() 返回的结果(rdd,func) - rdd : 比较好理解,就是要计算集合的抽象
- rdd.iterator(partition, context) : 返回当前rdd在当前partition上的迭代器,如果依赖的rdd不存在,需要级联 compute() 调用
- func : 应该是先调用迭代器取值,再依次调用func函数进行计算,具体Func函数内容应该是map,groupByKey 这些函数… // TODO
Task Run
// run:89, Task
(runTask(context), context.collectAccumulators())
// runTask:66, ResultTask
override def runTask(context: TaskContext): U = {
// Deserialize the RDD and the func using the broadcast variables.
val deserializeStartTime = System.currentTimeMillis()
val ser = SparkEnv.get.closureSerializer.newInstance()
val (rdd, func) = ser.deserialize[(RDD[T], (TaskContext, Iterator[T]) => U)](
ByteBuffer.wrap(taskBinary.value), Thread.currentThread.getContextClassLoader)
_executorDeserializeTime = System.currentTimeMillis() - deserializeStartTime
metrics = Some(context.taskMetrics)
func(context, rdd.iterator(partition, context))
}
// iterator:270, RDD
final def iterator(split: Partition, context: TaskContext): Iterator[T] = {
if (storageLevel != StorageLevel.NONE) {
SparkEnv.get.cacheManager.getOrCompute(this, split, context, storageLevel)
} else {
computeOrReadCheckpoint(split, context)
}
}
private[spark] def computeOrReadCheckpoint(split: Partition, context: TaskContext): Iterator[T] =
{
if (isCheckpointedAndMaterialized) {
firstParent[T].iterator(split, context)
} else {
compute(split, context)
}
}
// 和DAG类似,通过最后一个RDD的compute方法处理程序,我们这里最后一个 RDD 是 MapPartitionsRDD,调用进入firstParent的iterator方法,循环和上面的iterator类似
// compute:38, MapPartitionsRDD
override def compute(split: Partition, context: TaskContext): Iterator[U] =
f(context, split.index, firstParent[T].iterator(split, context))
// 再次进入compute方法时,执行 f 函数,再进入 RDD的 mapPartitions方法
/**
* Return a new RDD by applying a function to each partition of this RDD.
*
* `preservesPartitioning` indicates whether the input function preserves the partitioner, which
* should be `false` unless this is a pair RDD and the input function doesn't modify the keys.
*/
def mapPartitions[U: ClassTag](
f: Iterator[T] => Iterator[U],
preservesPartitioning: Boolean = false): RDD[U] = withScope {
val cleanedF = sc.clean(f)
new MapPartitionsRDD(
this,
(context: TaskContext, index: Int, iter: Iterator[T]) => cleanedF(iter),
preservesPartitioning)
}
cleanedF 函数在迭代器上执行,进入 cleanedF Function (= BroadcastNestedLoopJoin)的 override def doExecute(): RDD[InternalRow]
方法
// TODO 方法进入这里,可能是因为 cleanedF就是 `BroadcastNestedLoopJoin` 在 `doExecute()` 中定义的方法,被传输到executor节点被执行了
streamed.execute().mapPartitions { streamedIter =>
}
cleanedF = {BroadcastNestedLoopJoin$$anonfun$2@7677} "<function1>"
$outer = {BroadcastNestedLoopJoin@7680} "BroadcastNestedLoopJoin BuildRight, LeftOuter, Some((((open_time_hqb#19 <= day_id#22) || (open_time_dq#20 <= day_id#22)) || (open_time_lhqx#21 <= day_id#22)))\n:- HiveTableScan [customer_id#18,open_time_hqb#19,open_time_dq#20,open_time_lhqx#21], MetastoreRelation i8ji, tmp_dm_cust_daily_aum_open, Some(a)\n+- HiveTableScan [day_id#22], MetastoreRelation i8ji, tmp_dm_cust_daily_aum_pt, Some(t)\n"
numStreamedRows = {LongSQLMetric@7683} "0"
broadcastedRelation = {TorrentBroadcast@7679} "Broadcast(4)"