ReduceTask执行流程

最新推荐文章于 2024-04-03 08:49:03 发布

weixin_34413357

最新推荐文章于 2024-04-03 08:49:03 发布

阅读量280

点赞数

文章标签：大数据 java python

原文链接：https://my.oschina.net/yulongblog/blog/1505172

版权

2019独角兽企业重金招聘Python工程师标准>>>

ReduceTask主要包含5个部分shuffle、Merge、Sort、Reduce、Write五个阶段。

Shuffle阶段：也成为Copy阶段。ReduceTask从各个MapTask上远程拷贝一片数据，并针对某一片数据，如果其大小超过一定阈值，则写到磁盘，否则直接放到内存中。

Merge阶段：在远程拷贝数据的同时，ReduceTask启动了两个后台线程对内存和磁盘上的文件进行合并，以防止内存使用过多或磁盘上文件过多。

Sort阶段：按照MapReduce语义，用户编写的Reduce函数输入数据是按key进行聚集的一组数据。为了将key相同的数据聚在一起，Hadoop采用了基于排序的策略。由于各个MapTask已经实现对自己处理结果进行了局部排序，因此，ReduceTask只需对所有数据进行一次归并排序即可。

Reduce阶段：该阶段，ReduceTask将每组数据一次交给用户编写的reduce()函数处理。

Write阶段：reduce函数将计算结果写入HDFS上。

org.apache.hadoop.mapred.YarnChild的main方法

final Task taskFinal = task;
childUGI.doAs(new PrivilegedExceptionAction<Object>() {
  @Override
  public Object run() throws Exception {
    // use job-specified working directory
    FileSystem.get(job).setWorkingDirectory(job.getWorkingDirectory());
    taskFinal.run(job, umbilical); // run the task//这里调用的是org.apache.hadoop.mapred.Task接口的run方法
    return null;
  }
}

org.apache.hadoop.mapred.Task接口的实现类包括MapTask和ReduceTask。

ReduceTask的run方法分析

public void run(JobConf job, final TaskUmbilicalProtocol umbilical)
  throws IOException, InterruptedException, ClassNotFoundException {
  job.setBoolean(JobContext.SKIP_RECORDS, isSkipping());

  if (isMapOrReduce()) {
    copyPhase = getProgress().addPhase("copy");
    sortPhase  = getProgress().addPhase("sort");
    reducePhase = getProgress().addPhase("reduce");
  }
  // start thread that will handle communication with parent
  TaskReporter reporter = startReporter(umbilical);
  
  boolean useNewApi = job.getUseNewReducer();
  initialize(job, getJobID(), reporter, useNewApi);

  // check if it is a cleanupJobTask
  if (jobCleanup) {
    runJobCleanupTask(umbilical, reporter);
    return;
  }
  if (jobSetup) {
    runJobSetupTask(umbilical, reporter);
    return;
  }
  if (taskCleanup) {
    runTaskCleanupTask(umbilical, reporter);
    return;
  }
  
  // Initialize the codec
  codec = initCodec();
  RawKeyValueIterator rIter = null;
  ShuffleConsumerPlugin shuffleConsumerPlugin = null;
  
  Class combinerClass = conf.getCombinerClass();
  CombineOutputCollector combineCollector = 
    (null != combinerClass) ? 
   new CombineOutputCollector(reduceCombineOutputCounter, reporter, conf) : null;

  Class<? extends ShuffleConsumerPlugin> clazz =
        job.getClass(MRConfig.SHUFFLE_CONSUMER_PLUGIN, Shuffle.class, ShuffleConsumerPlugin.class);
        //接口ShuffleConsumerPlugin的实现类Shuffle，可自定义实现接口来实现shuffle功能
  shuffleConsumerPlugin = ReflectionUtils.newInstance(clazz, job);
  LOG.info("Using ShuffleConsumerPlugin: " + shuffleConsumerPlugin);

  ShuffleConsumerPlugin.Context shuffleContext = 
    new ShuffleConsumerPlugin.Context(getTaskID(), job, FileSystem.getLocal(job), umbilical, 
                super.lDirAlloc, reporter, codec, 
                combinerClass, combineCollector, 
                spilledRecordsCounter, reduceCombineInputCounter,
                shuffledMapsCounter,
                reduceShuffleBytes, failedShuffleCounter,
                mergedMapOutputsCounter,
                taskStatus, copyPhase, sortPhase, this,
                mapOutputFile, localMapFiles);
  shuffleConsumerPlugin.init(shuffleContext);

  rIter = shuffleConsumerPlugin.run();//Shuffle.run()里多个线程Fetcher.run()里copyFromHost获取数据

  // free up the data structures
  mapOutputFilesOnDisk.clear();
  
  sortPhase.complete();                         // sort is complete
  setPhase(TaskStatus.Phase.REDUCE); 
  statusUpdate(umbilical);
  Class keyClass = job.getMapOutputKeyClass();
  Class valueClass = job.getMapOutputValueClass();
  RawComparator comparator = job.getOutputValueGroupingComparator();

  if (useNewApi) {
    runNewReducer(job, umbilical, reporter, rIter, comparator, 
                  keyClass, valueClass);
  } else {
    runOldReducer(job, umbilical, reporter, rIter, comparator, 
                  keyClass, valueClass);
  }

  shuffleConsumerPlugin.close();
  done(umbilical, reporter);
}

runNewReducer方法分析

private <INKEY,INVALUE,OUTKEY,OUTVALUE>
void runNewReducer(JobConf job,
                   final TaskUmbilicalProtocol umbilical,
                   final TaskReporter reporter,
                   RawKeyValueIterator rIter,
                   RawComparator<INKEY> comparator,
                   Class<INKEY> keyClass,
                   Class<INVALUE> valueClass
                   ) throws IOException,InterruptedException, 
                            ClassNotFoundException {
  // wrap value iterator to report progress.
  final RawKeyValueIterator rawIter = rIter;
  rIter = new RawKeyValueIterator() {
    public void close() throws IOException {
      rawIter.close();
    }
    public DataInputBuffer getKey() throws IOException {
      return rawIter.getKey();
    }
    public Progress getProgress() {
      return rawIter.getProgress();
    }
    public DataInputBuffer getValue() throws IOException {
      return rawIter.getValue();
    }
    public boolean next() throws IOException {
      boolean ret = rawIter.next();
      reporter.setProgress(rawIter.getProgress().getProgress());
      return ret;
    }
  };
  // make a task context so we can get the classes
  org.apache.hadoop.mapreduce.TaskAttemptContext taskContext =
    new org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl(job,
        getTaskID(), reporter);//TaskAttemptContextImpl主要包含的是这个MapReduce任务的状态和过程信息
  // make a reducer
  org.apache.hadoop.mapreduce.Reducer<INKEY,INVALUE,OUTKEY,OUTVALUE> reducer =
    (org.apache.hadoop.mapreduce.Reducer<INKEY,INVALUE,OUTKEY,OUTVALUE>)
      ReflectionUtils.newInstance(taskContext.getReducerClass(), job);//反射获取用户自定义的Reducer类
  org.apache.hadoop.mapreduce.RecordWriter<OUTKEY,OUTVALUE> trackedRW = 
    new NewTrackingRecordWriter<OUTKEY, OUTVALUE>(this, taskContext);//WrappedReducer定义RecordWriter，最后会调用RecordWriter接口的write方法写数据
  job.setBoolean("mapred.skip.on", isSkipping());
  job.setBoolean(JobContext.SKIP_RECORDS, isSkipping());
  org.apache.hadoop.mapreduce.Reducer.Context 
       reducerContext = createReduceContext(reducer, job, getTaskID(),
                                             rIter, reduceInputKeyCounter, 
                                             reduceInputValueCounter, 
                                             trackedRW,
                                             committer,
                                             reporter, comparator, keyClass,
                                             valueClass);//返回的是Task.createReduceContext方法。并调用了WrappedReducer.getReducerContext方法返回值
//WrappedReducer.getReducerContext方法，WrappedReducer内部类Context是Reducer内部抽象类Context的子类，WrappedReducer.Context.wirte调用接口ReduceContext的实现类ReduceContextImpl的父类TaskInputOutputContextImpl的方法write，也就到了抽象类RecordWriter的write方法。关于抽象类的各种输出子类我们再变成模型OutputFormat部分在详述
  try {
    reducer.run(reducerContext);//执行reduce任务,在Reduce执行时一般如果进行输出都是使用context.write进行输出，也就是上面注释的过程了。
  } finally {
    trackedRW.close(reducerContext);
  }
}

Shuffle.run()方法

 public RawKeyValueIterator run() throws IOException, InterruptedException {
  // Scale the maximum events we fetch per RPC call to mitigate OOM issues
  // on the ApplicationMaster when a thundering herd of reducers fetch events
  // TODO: This should not be necessary after HADOOP-8942
  int eventsPerReducer = Math.max(MIN_EVENTS_TO_FETCH,
      MAX_RPC_OUTSTANDING_EVENTS / jobConf.getNumReduceTasks());
  int maxEventsToFetch = Math.min(MAX_EVENTS_TO_FETCH, eventsPerReducer);

  // Start the map-completion events fetcher thread
  final EventFetcher<K,V> eventFetcher = 
    new EventFetcher<K,V>(reduceId, umbilical, scheduler, this,
        maxEventsToFetch);
  eventFetcher.start();
  
  // Start the map-output fetcher threads
  boolean isLocal = localMapFiles != null;
  final int numFetchers = isLocal ? 1 :
    jobConf.getInt(MRJobConfig.SHUFFLE_PARALLEL_COPIES, 5);
  Fetcher<K,V>[] fetchers = new Fetcher[numFetchers];
  if (isLocal) {
    fetchers[0] = new LocalFetcher<K, V>(jobConf, reduceId, scheduler,
        merger, reporter, metrics, this, reduceTask.getShuffleSecret(),
        localMapFiles);
    fetchers[0].start();
  } else {
    for (int i=0; i < numFetchers; ++i) {
      fetchers[i] = new Fetcher<K,V>(jobConf, reduceId, scheduler, merger, 
                                     reporter, metrics, this, 
                                     reduceTask.getShuffleSecret());
      fetchers[i].start();//Fetcher线程，从mapoutput获取数据
    }
  }
  
  // Wait for shuffle to complete successfully
  while (!scheduler.waitUntilDone(PROGRESS_FREQUENCY)) {
    reporter.progress();
    
    synchronized (this) {
      if (throwable != null) {
        throw new ShuffleError("error in shuffle in " + throwingThreadName,
                               throwable);
      }
    }
  }

  // Stop the event-fetcher thread
  eventFetcher.shutDown();
  
  // Stop the map-output fetcher threads
  for (Fetcher<K,V> fetcher : fetchers) {
    fetcher.shutDown();
  }
  
  // stop the scheduler
  scheduler.close();

  copyPhase.complete(); // copy is already complete
  taskStatus.setPhase(TaskStatus.Phase.SORT);
  reduceTask.statusUpdate(umbilical);

  // Finish the on-going merges...
  RawKeyValueIterator kvIter = null;
  try {
    kvIter = merger.close();//调用了MergeManagerImpl.close()
  } catch (Throwable e) {
    throw new ShuffleError("Error while doing final merge " , e);
  }

  // Sanity check
  synchronized (this) {
    if (throwable != null) {
      throw new ShuffleError("error in shuffle in " + throwingThreadName,
                             throwable);
    }
  }
  
  return kvIter;
}

线程Fetcher.run()

public void run() {
  try {
    while (!stopped && !Thread.currentThread().isInterrupted()) {
      MapHost host = null;
      try {
        // If merge is on, block
        merger.waitForResource();//这里控制有一个线程进行merge操作的时候，阻塞。调用了MergeThread.waitForMerge()方法

        // Get a host to shuffle from
        host = scheduler.getHost();
        metrics.threadBusy();

        // Shuffle
        copyFromHost(host);//调用copyFromHost
      } finally {
        if (host != null) {
          scheduler.freeHost(host);
          metrics.threadFree();            
        }
      }
    }
  } catch (InterruptedException ie) {
    return;
  } catch (Throwable t) {
    exceptionReporter.reportException(t);
  }
}

线程Fetcher.copyFromHost()

protected void copyFromHost(MapHost host) throws IOException {
  // reset retryStartTime for a new host
  retryStartTime = 0;
  // Get completed maps on 'host'
  List<TaskAttemptID> maps = scheduler.getMapsForHost(host);
  
  // Sanity check to catch hosts with only 'OBSOLETE' maps, 
  // especially at the tail of large jobs
  if (maps.size() == 0) {
    return;
  }
  
  if(LOG.isDebugEnabled()) {
    LOG.debug("Fetcher " + id + " going to fetch from " + host + " for: "
      + maps);
  }
  
  // List of maps to be fetched yet
  Set<TaskAttemptID> remaining = new HashSet<TaskAttemptID>(maps);
  
  // Construct the url and connect
  URL url = getMapOutputURL(host, maps);
  DataInputStream input = openShuffleUrl(host, remaining, url);
  if (input == null) {
    return;
  }
  
  try {
    // Loop through available map-outputs and fetch them
    // On any error, faildTasks is not null and we exit
    // after putting back the remaining maps to the 
    // yet_to_be_fetched list and marking the failed tasks.
    TaskAttemptID[] failedTasks = null;
    while (!remaining.isEmpty() && failedTasks == null) {
      try {
        failedTasks = copyMapOutput(host, input, remaining, fetchRetryEnabled);//调用copyMapOutput方法
      } catch (IOException e) {
        //
        // Setup connection again if disconnected by NM
        connection.disconnect();
        // Get map output from remaining tasks only.
        url = getMapOutputURL(host, remaining);
        input = openShuffleUrl(host, remaining, url);
        if (input == null) {
          return;
        }
      }
    }
    
    if(failedTasks != null && failedTasks.length > 0) {
      LOG.warn("copyMapOutput failed for tasks "+Arrays.toString(failedTasks));
      scheduler.hostFailed(host.getHostName());
      for(TaskAttemptID left: failedTasks) {
        scheduler.copyFailed(left, host, true, false);
      }
    }

    // Sanity check
    if (failedTasks == null && !remaining.isEmpty()) {
      throw new IOException("server didn't return all expected map outputs: "
          + remaining.size() + " left.");
    }
    input.close();
    input = null;
  } finally {
    if (input != null) {
      IOUtils.cleanup(LOG, input);
      input = null;
    }
    for (TaskAttemptID left : remaining) {
      scheduler.putBackKnownMapOutput(host, left);
    }
  }
}

线程Fetcher.copyMapOutput()

private TaskAttemptID[] copyMapOutput(MapHost host,
                              DataInputStream input,
                              Set<TaskAttemptID> remaining,
                              boolean canRetry) throws IOException {
  MapOutput<K,V> mapOutput = null;
  TaskAttemptID mapId = null;
  long decompressedLength = -1;
  long compressedLength = -1;
  
  try {
    long startTime = Time.monotonicNow();
    int forReduce = -1;
    //Read the shuffle header
    try {
      ShuffleHeader header = new ShuffleHeader();
      header.readFields(input);
      mapId = TaskAttemptID.forName(header.mapId);
      compressedLength = header.compressedLength;
      decompressedLength = header.uncompressedLength;
      forReduce = header.forReduce;
    } catch (IllegalArgumentException e) {
      badIdErrs.increment(1);
      LOG.warn("Invalid map id ", e);
      //Don't know which one was bad, so consider all of them as bad
      return remaining.toArray(new TaskAttemptID[remaining.size()]);
    }

    InputStream is = input;
    is = CryptoUtils.wrapIfNecessary(jobConf, is, compressedLength);
    compressedLength -= CryptoUtils.cryptoPadding(jobConf);
    decompressedLength -= CryptoUtils.cryptoPadding(jobConf);
    
    // Do some basic sanity verification
    if (!verifySanity(compressedLength, decompressedLength, forReduce,
        remaining, mapId)) {
      return new TaskAttemptID[] {mapId};
    }
    
    if(LOG.isDebugEnabled()) {
      LOG.debug("header: " + mapId + ", len: " + compressedLength + 
          ", decomp len: " + decompressedLength);
    }
    
    // Get the location for the map output - either in-memory or on-disk
    try {
      mapOutput = merger.reserve(mapId, decompressedLength, id);//获取mapoutput的地址，这里调用的是MergeManagerImpl.reserve(),返回的是两种OnDiskMapOutput和InMemoryMapOutput
    } catch (IOException ioe) {
      // kill this reduce attempt
      ioErrs.increment(1);
      scheduler.reportLocalError(ioe);
      return EMPTY_ATTEMPT_ID_ARRAY;
    }
    
    // Check if we can shuffle *now* ...
    if (mapOutput == null) {
      LOG.info("fetcher#" + id + " - MergeManager returned status WAIT ...");
      //Not an error but wait to process data.
      return EMPTY_ATTEMPT_ID_ARRAY;
    } 
    
    // The codec for lz0,lz4,snappy,bz2,etc. throw java.lang.InternalError
    // on decompression failures. Catching and re-throwing as IOException
    // to allow fetch failure logic to be processed
    try {
      // Go!
      LOG.info("fetcher#" + id + " about to shuffle output of map "
          + mapOutput.getMapId() + " decomp: " + decompressedLength
          + " len: " + compressedLength + " to " + mapOutput.getDescription());
      mapOutput.shuffle(host, is, compressedLength, decompressedLength,
          metrics, reporter);//这里根据OnDiskMapOutput和InMemoryMapOutput调用各自的shuffle方法进行mapoutput数据的传输
    } catch (java.lang.InternalError e) {
      LOG.warn("Failed to shuffle for fetcher#"+id, e);
      throw new IOException(e);
    }
    
    // Inform the shuffle scheduler
    long endTime = Time.monotonicNow();
    // Reset retryStartTime as map task make progress if retried before.
    retryStartTime = 0;
    
    scheduler.copySucceeded(mapId, host, compressedLength, 
                            startTime, endTime, mapOutput);
    // Note successful shuffle
    remaining.remove(mapId);
    metrics.successFetch();
    return null;
  } catch (IOException ioe) {
    if (mapOutput != null) {
      mapOutput.abort();
    }

    if (canRetry) {
      checkTimeoutOrRetry(host, ioe);
    } 
    
    ioErrs.increment(1);
    if (mapId == null || mapOutput == null) {
      LOG.warn("fetcher#" + id + " failed to read map header" + 
               mapId + " decomp: " + 
               decompressedLength + ", " + compressedLength, ioe);
      if(mapId == null) {
        return remaining.toArray(new TaskAttemptID[remaining.size()]);
      } else {
        return new TaskAttemptID[] {mapId};
      }
    }
      
    LOG.warn("Failed to shuffle output of " + mapId + 
             " from " + host.getHostName(), ioe); 

    // Inform the shuffle-scheduler
    metrics.failedFetch();
    return new TaskAttemptID[] {mapId};
  }

}

MergeManagerImpl.close()

public RawKeyValueIterator close() throws Throwable {
  // Wait for on-going merges to complete
  if (memToMemMerger != null) { 
    memToMemMerger.close();
  }
  inMemoryMerger.close();
  onDiskMerger.close();
  
  List<InMemoryMapOutput<K, V>> memory = 
    new ArrayList<InMemoryMapOutput<K, V>>(inMemoryMergedMapOutputs);
  inMemoryMergedMapOutputs.clear();
  memory.addAll(inMemoryMapOutputs);
  inMemoryMapOutputs.clear();
  List<CompressAwarePath> disk = new ArrayList<CompressAwarePath>(onDiskMapOutputs);
  onDiskMapOutputs.clear();
  return finalMerge(jobConf, rfs, memory, disk);//这一步是sort阶段
}

MergeManagerImpl.finalMerge()方法

private RawKeyValueIterator finalMerge(JobConf job, FileSystem fs,
                                     List<InMemoryMapOutput<K,V>> inMemoryMapOutputs,
                                     List<CompressAwarePath> onDiskMapOutputs
                                     ) throws IOException {
  LOG.info("finalMerge called with " +
      inMemoryMapOutputs.size() + " in-memory map-outputs and " +
      onDiskMapOutputs.size() + " on-disk map-outputs");
  final long maxInMemReduce = getMaxInMemReduceLimit();
  // merge config params 
  Class<K> keyClass = (Class<K>)job.getMapOutputKeyClass();
  Class<V> valueClass = (Class<V>)job.getMapOutputValueClass();
  boolean keepInputs = job.getKeepFailedTaskFiles();
  final Path tmpDir = new Path(reduceId.toString());
  final RawComparator<K> comparator =
    (RawComparator<K>)job.getOutputKeyComparator();

  // segments required to vacate memory
  List<Segment<K,V>> memDiskSegments = new ArrayList<Segment<K,V>>();
  long inMemToDiskBytes = 0;
  boolean mergePhaseFinished = false;
  if (inMemoryMapOutputs.size() > 0) {
    TaskID mapId = inMemoryMapOutputs.get(0).getMapId().getTaskID();
    inMemToDiskBytes = createInMemorySegments(inMemoryMapOutputs, 
                                              memDiskSegments,
                                              maxInMemReduce);
    final int numMemDiskSegments = memDiskSegments.size();
    if (numMemDiskSegments > 0 &&
          ioSortFactor > onDiskMapOutputs.size()) {
      
      // If we reach here, it implies that we have less than io.sort.factor
      // disk segments and this will be incremented by 1 (result of the 
      // memory segments merge). Since this total would still be 
      // <= io.sort.factor, we will not do any more intermediate merges,
      // the merge of all these disk segments would be directly fed to the
      // reduce method
      
      mergePhaseFinished = true;
      // must spill to disk, but can't retain in-mem for intermediate merge
      final Path outputPath = 
        mapOutputFile.getInputFileForWrite(mapId,
                                           inMemToDiskBytes).suffix(
                                               Task.MERGED_OUTPUT_PREFIX);
      final RawKeyValueIterator rIter = Merger.merge(job, fs,
          keyClass, valueClass, memDiskSegments, numMemDiskSegments,
          tmpDir, comparator, reporter, spilledRecordsCounter, null, 
          mergePhase);

      FSDataOutputStream out = CryptoUtils.wrapIfNecessary(job, fs.create(outputPath));
      Writer<K, V> writer = new Writer<K, V>(job, out, keyClass, valueClass,
          codec, null, true);
      try {
        Merger.writeFile(rIter, writer, reporter, job);
        writer.close();
        onDiskMapOutputs.add(new CompressAwarePath(outputPath,
            writer.getRawLength(), writer.getCompressedLength()));//inMemoryMapOutputs也会merge到onDiskMapOutputs中
        writer = null;
        // add to list of final disk outputs.
      } catch (IOException e) {
        if (null != outputPath) {
          try {
            fs.delete(outputPath, true);
          } catch (IOException ie) {
            // NOTHING
          }
        }
        throw e;
      } finally {
        if (null != writer) {
          writer.close();
        }
      }
      LOG.info("Merged " + numMemDiskSegments + " segments, " +
               inMemToDiskBytes + " bytes to disk to satisfy " +
               "reduce memory limit");
      inMemToDiskBytes = 0;
      memDiskSegments.clear();
    } else if (inMemToDiskBytes != 0) {
      LOG.info("Keeping " + numMemDiskSegments + " segments, " +
               inMemToDiskBytes + " bytes in memory for " +
               "intermediate, on-disk merge");
    }
  }

  // segments on disk
  List<Segment<K,V>> diskSegments = new ArrayList<Segment<K,V>>();
  long onDiskBytes = inMemToDiskBytes;
  long rawBytes = inMemToDiskBytes;
  CompressAwarePath[] onDisk = onDiskMapOutputs.toArray(
      new CompressAwarePath[onDiskMapOutputs.size()]);
  for (CompressAwarePath file : onDisk) {
    long fileLength = fs.getFileStatus(file).getLen();
    onDiskBytes += fileLength;
    rawBytes += (file.getRawDataLength() > 0) ? file.getRawDataLength() : fileLength;

    LOG.debug("Disk file: " + file + " Length is " + fileLength);
    diskSegments.add(new Segment<K, V>(job, fs, file, codec, keepInputs,
                                       (file.toString().endsWith(
                                           Task.MERGED_OUTPUT_PREFIX) ?
                                        null : mergedMapOutputsCounter), file.getRawDataLength()
                                      ));
  }
  LOG.info("Merging " + onDisk.length + " files, " +
           onDiskBytes + " bytes from disk");
  Collections.sort(diskSegments, new Comparator<Segment<K,V>>() {
    public int compare(Segment<K, V> o1, Segment<K, V> o2) {
      if (o1.getLength() == o2.getLength()) {
        return 0;
      }
      return o1.getLength() < o2.getLength() ? -1 : 1;
    }
  });

  // build final list of segments from merged backed by disk + in-mem
  List<Segment<K,V>> finalSegments = new ArrayList<Segment<K,V>>();
  long inMemBytes = createInMemorySegments(inMemoryMapOutputs, 
                                           finalSegments, 0);
  LOG.info("Merging " + finalSegments.size() + " segments, " +
           inMemBytes + " bytes from memory into reduce");
  if (0 != onDiskBytes) {
    final int numInMemSegments = memDiskSegments.size();
    diskSegments.addAll(0, memDiskSegments);
    memDiskSegments.clear();
    // Pass mergePhase only if there is a going to be intermediate
    // merges. See comment where mergePhaseFinished is being set
    Progress thisPhase = (mergePhaseFinished) ? null : mergePhase; 
    RawKeyValueIterator diskMerge = Merger.merge(
        job, fs, keyClass, valueClass, codec, diskSegments,
        ioSortFactor, numInMemSegments, tmpDir, comparator,
        reporter, false, spilledRecordsCounter, null, thisPhase);
    diskSegments.clear();
    if (0 == finalSegments.size()) {
      return diskMerge;
    }
    finalSegments.add(new Segment<K,V>(
          new RawKVIteratorReader(diskMerge, onDiskBytes), true, rawBytes));//这里是排序操作
  }
  return Merger.merge(job, fs, keyClass, valueClass,
               finalSegments, finalSegments.size(), tmpDir,
               comparator, reporter, spilledRecordsCounter, null,
               null);

}

转载于:https://my.oschina.net/yulongblog/blog/1505172