Hadoop 2.7.5 MapReduce Recovery

Map reduce can recovery from last attempt, the succeeded map tasks or reduce tasks in previous application attempt will not be executed again.

MRAppMaster.serviceInit calls processRecovery to recovery from previous attempt.

 private void processRecovery() throws IOException{
    if (appAttemptID.getAttemptId() == 1) {
      return;  // no need to recover on the first attempt
    }

    boolean recoveryEnabled = getConfig().getBoolean(
        MRJobConfig.MR_AM_JOB_RECOVERY_ENABLE,
        MRJobConfig.MR_AM_JOB_RECOVERY_ENABLE_DEFAULT);

    boolean recoverySupportedByCommitter = isRecoverySupported(); // true

    // If a shuffle secret was not provided by the job client then this app
    // attempt will generate one.  However that disables recovery if there
    // are reducers as the shuffle secret would be app attempt specific.
    int numReduceTasks = getConfig().getInt(MRJobConfig.NUM_REDUCES, 0);
    boolean shuffleKeyValidForRecovery =
        TokenCache.getShuffleSecretKey(jobCredentials) != null;

    if (recoveryEnabled && recoverySupportedByCommitter
        && (numReduceTasks <= 0 || shuffleKeyValidForRecovery)) {
      LOG.info("Recovery is enabled. "
          + "Will try to recover from previous life on best effort basis.");
      try {
        parsePreviousJobHistory();
      } catch (IOException e) {
        LOG.warn("Unable to parse prior job history, aborting recovery", e);
        // try to get just the AMInfos
        amInfos.addAll(readJustAMInfos());
      }
    } else {
      LOG.info("Will not try to recover. recoveryEnabled: "
            + recoveryEnabled + " recoverySupportedByCommitter: "
            + recoverySupportedByCommitter + " numReduceTasks: "
            + numReduceTasks + " shuffleKeyValidForRecovery: "
            + shuffleKeyValidForRecovery + " ApplicationAttemptID: "
            + appAttemptID.getAttemptId());
      // Get the amInfos anyways whether recovery is enabled or not
      amInfos.addAll(readJustAMInfos());
    }
  }

MRAppMaster.isRecoverySupported

IsRecoverySupported is determined by committer.isRecoverySupported(_jobContext), which always return true when the type of output commiter is FileOutputCommitter.

private boolean isRecoverySupported() throws IOException {
    boolean isSupported = false;
    Configuration conf = getConfig();
    if (committer != null) {
      final JobContext _jobContext = getJobContextFromConf(conf);
      isSupported = callWithJobClassLoader(conf,
          new ExceptionAction<Boolean>() {
            public Boolean call(Configuration conf) throws IOException {
              return committer.isRecoverySupported(_jobContext);
            }
      });
    }
    return isSupported;
  }
private void parsePreviousJobHistory() throws IOException {
    FSDataInputStream in = getPreviousJobHistoryStream(getConfig(),
        appAttemptID);
    JobHistoryParser parser = new JobHistoryParser(in);
    JobInfo jobInfo = parser.parse();
    Exception parseException = parser.getParseException();
    if (parseException != null) {
      LOG.info("Got an error parsing job-history file" +
          ", ignoring incomplete events.", parseException);
    }
    Map<org.apache.hadoop.mapreduce.TaskID, TaskInfo> taskInfos = jobInfo
        .getAllTasks();
    for (TaskInfo taskInfo : taskInfos.values()) {
      if (TaskState.SUCCEEDED.toString().equals(taskInfo.getTaskStatus())) {
        Iterator<Entry<TaskAttemptID, TaskAttemptInfo>> taskAttemptIterator =
            taskInfo.getAllTaskAttempts().entrySet().iterator();
        while (taskAttemptIterator.hasNext()) {
          Map.Entry<TaskAttemptID, TaskAttemptInfo> currentEntry = taskAttemptIterator.next();
          if (!jobInfo.getAllCompletedTaskAttempts().containsKey(currentEntry.getKey())) {
            taskAttemptIterator.remove();
          }
        }
        completedTasksFromPreviousRun
            .put(TypeConverter.toYarn(taskInfo.getTaskId()), taskInfo);
        LOG.info("Read from history task "
            + TypeConverter.toYarn(taskInfo.getTaskId()));
      }
    }
    LOG.info("Read completed tasks from history "
        + completedTasksFromPreviousRun.size());
    recoveredJobStartTime = jobInfo.getLaunchTime();

    // recover AMInfos
    List<JobHistoryParser.AMInfo> jhAmInfoList = jobInfo.getAMInfos();
    if (jhAmInfoList != null) {
      for (JobHistoryParser.AMInfo jhAmInfo : jhAmInfoList) {
        AMInfo amInfo = MRBuilderUtils.newAMInfo(jhAmInfo.getAppAttemptId(),
            jhAmInfo.getStartTime(), jhAmInfo.getContainerId(),
            jhAmInfo.getNodeManagerHost(), jhAmInfo.getNodeManagerPort(),
            jhAmInfo.getNodeManagerHttpPort());
        amInfos.add(amInfo);
      }
    }
  }

MRAppMaster.getPreviousJobHistoryStream

  private static FSDataInputStream getPreviousJobHistoryStream(
      Configuration conf, ApplicationAttemptId appAttemptId)
      throws IOException {
    Path historyFile = JobHistoryUtils.getPreviousJobHistoryPath(conf,
        appAttemptId);
    LOG.info("Previous history file is at " + historyFile);
    //historyFile: hdfs://localhost:8020/tmp/hadoop-yarn/staging/houzhizhen/.staging/job_1523876398612_0014/job_1523876398612_0014_1.jhist
    return historyFile.getFileSystem(conf).open(historyFile);
  }

MRAppMaster.

MRAppMaster.

MRAppMaster.

MRAppMaster.

MRAppMaster.

MRAppMaster.

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值