1. 在LocalJobRunner$Job中的run()方法中
try {
if (numReduceTasks > 0) {
//根据reduceTask的个数,创建对应个数的LocalJobRunner$Job$ReduceTaskRunnable
List<RunnableWithThrowable> reduceRunnables = getReduceTaskRunnables(
jobId, mapOutputFiles);
// 线程池
ExecutorService reduceService = createReduceExecutor();
//将 ReduceTaskRunnable提交给线程池执行
runTasks(reduceRunnables, reduceService, "reduce");
}
1) . 执行 LocalJobRunner$Job$ReduceTaskRunnable 中的run方法
(1) . ReduceTask reduce = new ReduceTask(systemJobFile.toString(),
reduceId, taskId, mapIds.size(), 1);
//创建ReduceTask对象
(2) . reduce.run(localConf, Job.this); // 执行ReduceTask的run方法
<1> . runNewReducer(job, umbilical, reporter, rIter, comparator,
keyClass, valueClass);
[1] . org.apache.hadoop.mapreduce.TaskAttemptContext taskContext = TaskAttemptContextImpl
[2] . org.apache.hadoop.mapreduce.Reducer<INKEY,INVALUE,OUTKEY,OUTVALUE> reducer = WordCountReducer
[3] . org.apache.hadoop.mapreduce.RecordWriter<OUTKEY,OUTVALUE> trackedRW = ReduceTask$NewTrackingRecordWriter
[4] . reducer.run(reducerContext);
//执行WordCountReducer的run方法 ,实际执行的是WordCountReducer继承的Reducer类中的run方法.
{1} .reduce(context.getCurrentKey(), context.getValues(), context);
//执行到WordCountReducer中的 reduce方法.
{2} . context.write(k,v) 将处理完的kv写出.
>>1 . reduceContext.write(key, value);
>>2 . output.write(key, value);
>>3 . real.write(key,value); // 通过RecordWriter将kv写出
>>4 . out.write(NEWLINE); //通过输出流将数据写到结果文件中