0、回顾【Hadoop篇】YARN源码分析(二)
NodeManager启动了YarnChild
1、YarnChild分析
1.YarnChild执行提交的MapTask或ReduceTask
2.TaskUmbilicalProtocol将MapTask和ReduceTask处理结果反馈给MRAppMaster
class YarnChild {
public static void main(String[] args) throws Throwable {
final JobConf job = new JobConf(MRJobConfig.JOB_CONF_FILE);
configureTask(job, task, credentials, jt);
//TaskUmbilicalProtocol协议启动
final InetSocketAddress address = NetUtils.createSocketAddrForHost(host, port);
final TaskUmbilicalProtocol umbilical = taskOwner.doAs(new PrivilegedExceptionAction<TaskUmbilicalProtocol>() {
@Override
public TaskUmbilicalProtocol run() throws Exception {
return (TaskUmbilicalProtocol)RPC.getProxy(TaskUmbilicalProtocol.class, TaskUmbilicalProtocol.versionID, address, job);
}
});
//获取需要执行的任务
Task task = null;
myTask = umbilical.getTask(context);
task = myTask.getTask();
YarnChild.taskid = task.getTaskID();
configureTask(job, task, credentials, jt);
//执行任务
MRApps.setJobClassLoader(job);
taskFinal.run(job, umbilical);
}
}
public class MapTask extends Task {
public void run(final JobConf job, final TaskUmbilicalProtocol umbilical) throws IOException, ClassNotFoundException, InterruptedException {
TaskReporter reporter = startReporter(umbilical);
initialize(job, getJobID(), reporter, useNewApi);
runNewMapper(job, splitMetaInfo, umbilical, reporter);
done(umbilical, reporter);
}
private <INKEY,INVALUE,OUTKEY,OUTVALUE> void runNewMapper(final JobConf job, final TaskSplitIndex splitIndex, final TaskUmbilicalProtocol umbilical, TaskReporter reporter ) {
TaskAttemptContext taskContext = new TaskAttemptContextImpl(job, getTaskID(),reporter);
//make a mapper
org.apache.hadoop.mapreduce.Mapper<INKEY,INVALUE,OUTKEY,OUTVALUE> mapper = (org.apache.hadoop.mapreduce.Mapper<INKEY,INVALUE,OUTKEY,OUTVALUE>) ReflectionUtils.newInstance(taskContext.getMapperClass(), job);
// make the input format
org.apache.hadoop.mapreduce.InputFormat<INKEY,INVALUE> inputFormat = (org.apache.hadoop.mapreduce.InputFormat<INKEY,INVALUE>) ReflectionUtils.newInstance(taskContext.getInputFormatClass(), job);
// rebuild the input split
split = getSplitDetails(new Path(splitIndex.getSplitLocation()), splitIndex.getStartOffset());
org.apache.hadoop.mapreduce.RecordReader<INKEY,INVALUE> input = new NewTrackingRecordReader<INKEY,INVALUE>(split, inputFormat, reporter, taskContext);
org.apache.hadoop.mapreduce.RecordWriter output = null;
output = new NewOutputCollector(taskContext, job, umbilical, reporter);
org.apache.hadoop.mapreduce.MapContext<INKEY, INVALUE, OUTKEY, OUTVALUE>
mapContext = new MapContextImpl<INKEY, INVALUE, OUTKEY, OUTVALUE>(job, getTaskID(), input, output, committer, reporter, split);
org.apache.hadoop.mapreduce.Mapper<INKEY,INVALUE,OUTKEY,OUTVALUE>.Context
mapperContext = new WrappedMapper<INKEY, INVALUE, OUTKEY, OUTVALUE>().getMapContext(mapContext);
input.initialize(split, mapperContext);
mapper.run(mapperContext);
->setup(context);
->while (context.nextKeyValue()) { --context和MapContextImpl有关系
->map(context.getCurrentKey(), context.getCurrentValue(), context);
->业务处理...
->context.write(new Text(word), new IntWritable(1)); --业务代码
mapPhase.complete();
statusUpdate(umbilical);
input.close();
output.close(mapperContext);
closeQuietly(input);
}
public void done(TaskUmbilicalProtocol umbilical, TaskReporter reporter) throws IOException, InterruptedException {
sendDone(umbilical);
->umbilical.done(getTaskID()); --umbilical是TaskUmbilicalProtocol
}
}