mapreduce提交job源码分析

//从job提交开始
job.waitForCompletion(true); 
waitForCompletion分析:
/**
* 提交代码到集群并返回结果
* @param verbose 是否打印过程信息
* @return true 成功返回true
* @throws IOException thrown if the communication with the 
*         <code>JobTracker</code> is lost
*/
public boolean waitForCompletion(boolean verbose
                                   ) throws IOException, InterruptedException,
                                            ClassNotFoundException {
    if (state == JobState.DEFINE) {
 //提交job 见下文
      submit(); 
    }
    if (verbose) {
      monitorAndPrintJob(); //打印过程信息 略
    } else {
      // 获得检查job是否处理成功的轮寻时间间隔
      int completionPollIntervalMillis = 
        Job.getCompletionPollInterval(cluster.getConf());
      while (!isComplete()) {
        try {
 //如果没成功则sleep指定的时间
          Thread.sleep(completionPollIntervalMillis);
        } catch (InterruptedException ie) {
        }
      }
    }
    return isSuccessful();
  }
  
submit分析:
  /**
   * 将job提交给集群并立即返回结果
   * @throws IOException
   */
  public void submit() 
         throws IOException, InterruptedException, ClassNotFoundException {
    //验证状态一致 如出现job的状态与当前保存的状态不一致或者当前状态为运行态但cluster=null 都将报错
    ensureState(JobState.DEFINE);
//设置新的API,hadoop2.x以后MR使用了新的API 
    setUseNewAPI();
    connect();//获得cluster对象 见下文 
//通过cluster获得负责提交job的对象submitter
    final JobSubmitter submitter = 
        getJobSubmitter(cluster.getFileSystem(), cluster.getClient());
    status = ugi.doAs(new PrivilegedExceptionAction<JobStatus>() {
      public JobStatus run() throws IOException, InterruptedException, 
      ClassNotFoundException {
   //提交工作的内部方法,返回job的状态 见下文
        return submitter.submitJobInternal(Job.this, cluster);
      }
    });
    state = JobState.RUNNING;
    LOG.info("The url to track the job: " + getTrackingURL());
   }
   
 connect分析: 
   /**
   * 获得cluster对象
   */
    private synchronized void connect()
          throws IOException, InterruptedException, ClassNotFoundException {
    if (cluster == null) {
      //PrivilegedExceptionAction为一个接口只包含一个run方法
 //详情见下文
      cluster =ugi.doAs(new PrivilegedExceptionAction<Cluster>() {  
 //run方法执行后的返回值作为doAs的返回值返回给cluster
                   public Cluster run()
                          throws IOException, InterruptedException, 
                                 ClassNotFoundException {
                     return new Cluster(getConfiguration());
                   }
                 });
    }
  }
 
doAs分析:
   /**
   * 运行指定的action
   * @param <T> 指定返回值的类型
   * @param 被执行的action
   * @return 返回run方法的结果
   */
  @InterfaceAudience.Public
  @InterfaceStability.Evolving
  public <T> T doAs(PrivilegedAction<T> action) {
    logPrivilegedAction(subject, action);
    return Subject.doAs(subject, action); //详细的执行过程不再追述
  }
 
submitJobInternal分析:

  /* 
   * 内部提交job的类
   * @param job 被提交的工作
   * @param cluster 集群类对象
   * @throws ClassNotFoundException
   * @throws InterruptedException
   * @throws IOException
   */
  JobStatus submitJobInternal(Job job, Cluster cluster) 
  throws ClassNotFoundException, InterruptedException, IOException {

    //检查reduce的数量,默认等于map的个数,可在job中配置
//reduce的输出检查。例如:输出文件路径如果存在则报错
    checkSpecs(job);

    Configuration conf = job.getConfiguration();
//添加MR的框架到分布式系统的缓存中,此处理解不是很清楚,待进一步分析???
    addMRFrameworkToDistributedCache(conf);

//获得客户端相关数据提交到hdfs的位置,该位置由Namenode返回
    Path jobStagingArea = JobSubmissionFiles.getStagingDir(cluster, conf);
    //configure the command line options correctly on the submitting dfs
    InetAddress ip = InetAddress.getLocalHost();
    if (ip != null) {
      submitHostAddress = ip.getHostAddress();
      submitHostName = ip.getHostName();
      conf.set(MRJobConfig.JOB_SUBMITHOST,submitHostName);
      conf.set(MRJobConfig.JOB_SUBMITHOSTADDR,submitHostAddress);
    }
//获得jobId 同样由namenode返回
    JobID jobId = submitClient.getNewJobID();
    job.setJobID(jobId);
//将返回的存放路径与jobId拼接成一个独一无二的位置作为客户端数据的最终存放位置
    Path submitJobDir = new Path(jobStagingArea, jobId.toString());
    JobStatus status = null;
//往conf中设置相关字段信息
    try {
      conf.set(MRJobConfig.USER_NAME,
          UserGroupInformation.getCurrentUser().getShortUserName());
      conf.set("hadoop.http.filter.initializers", 
          "org.apache.hadoop.yarn.server.webproxy.amfilter.AmFilterInitializer");
      conf.set(MRJobConfig.MAPREDUCE_JOB_DIR, submitJobDir.toString());
      LOG.debug("Configuring job " + jobId + " with " + submitJobDir 
          + " as the submit dir");
      // 获得授权令牌token ,令牌机制略
      TokenCache.obtainTokensForNamenodes(job.getCredentials(),
          new Path[] { submitJobDir }, conf);
      
      populateTokenCache(conf, job.getCredentials());

      // generate a secret to authenticate shuffle transfers
      if (TokenCache.getShuffleSecretKey(job.getCredentials()) == null) {
        KeyGenerator keyGen;
        try {
          keyGen = KeyGenerator.getInstance(SHUFFLE_KEYGEN_ALGORITHM);
          keyGen.init(SHUFFLE_KEY_LENGTH);
        } catch (NoSuchAlgorithmException e) {
          throw new IOException("Error generating shuffle secret key", e);
        }
        SecretKey shuffleKey = keyGen.generateKey();
        TokenCache.setShuffleSecretKey(shuffleKey.getEncoded(),
            job.getCredentials());
      }
      if (CryptoUtils.isEncryptedSpillEnabled(conf)) {
        conf.setInt(MRJobConfig.MR_AM_MAX_ATTEMPTS, 1);
        LOG.warn("Max job attempts set to 1 since encrypted intermediate" +
                "data spill is enabled");
      }

 //配置jobConf
      copyAndConfigureFiles(job, submitJobDir);

 //文件的上传路径submitJobDir/job.xml
      Path submitJobFile = JobSubmissionFiles.getJobConfPath(submitJobDir);
      
      //创建map的个数 多少个split就对应多少个map
      LOG.debug("Creating splits at " + jtFs.makeQualified(submitJobDir));
      int maps = writeSplits(job, submitJobDir);
      conf.setInt(MRJobConfig.NUM_MAPS, maps);
      LOG.info("number of splits:" + maps);

      // write "queue admins of the queue to which job is being submitted"
      // to job file.
      String queue = conf.get(MRJobConfig.QUEUE_NAME,
          JobConf.DEFAULT_QUEUE_NAME);
      AccessControlList acl = submitClient.getQueueAdmins(queue);
      conf.set(toFullPropertyName(queue,
          QueueACL.ADMINISTER_JOBS.getAclName()), acl.getAclString());

      // removing jobtoken referrals before copying the jobconf to HDFS
      // as the tasks don't need this setting, actually they may break
      // because of it if present as the referral will point to a
      // different job.
      TokenCache.cleanUpTokenReferral(conf);

      if (conf.getBoolean(
          MRJobConfig.JOB_TOKEN_TRACKING_IDS_ENABLED,
          MRJobConfig.DEFAULT_JOB_TOKEN_TRACKING_IDS_ENABLED)) {
        // Add HDFS tracking ids
        ArrayList<String> trackingIds = new ArrayList<String>();
        for (Token<? extends TokenIdentifier> t :
            job.getCredentials().getAllTokens()) {
          trackingIds.add(t.decodeIdentifier().getTrackingId());
        }
        conf.setStrings(MRJobConfig.JOB_TOKEN_TRACKING_IDS,
            trackingIds.toArray(new String[trackingIds.size()]));
      }

      // Set reservation info if it exists
      ReservationId reservationId = job.getReservationId();
      if (reservationId != null) {
        conf.set(MRJobConfig.RESERVATION_ID, reservationId.toString());
      }

      //将conf写入到job提交文件的路径下
      writeConf(conf, submitJobFile);  
      printTokens(jobId, job.getCredentials());
 //真正提交job的方法,submitClient是ClientProtocol的对象,该类为一个通信协议类
 //ClientProtocol有两个实现:YARNRunner 和 LocalJobRunner 分别对应集群模式和本地模式
 //详情见下文 
      status = submitClient.submitJob(
          jobId, submitJobDir.toString(), job.getCredentials());
      if (status != null) {
        return status;
      } else {
        throw new IOException("Could not launch job");
      }
    } finally {
      if (status == null) {
        LOG.info("Cleaning up the staging area " + submitJobDir);
        if (jtFs != null && submitJobDir != null)
          jtFs.delete(submitJobDir, true);


      }
    }
  }
  
 YARNRunner.submitJob分析:
  @Override
  public JobStatus submitJob(JobID jobId, String jobSubmitDir, Credentials ts)
  throws IOException, InterruptedException {
    //添加授权令牌
    addHistoryToken(ts);
    // 得到任务提交的appContext
    ApplicationSubmissionContext appContext =
      createApplicationSubmissionContext(conf, jobSubmitDir, ts);

    //将appContext提交到ResourceManager
    try {
 //resMgrDelegate是ResourceManager的一个代理,详情见下文
      ApplicationId applicationId =resMgrDelegate.submitApplication(appContext);
 
      //从resMgrDelegate获得ApplicationReport,从而得到任务运行的状态
      ApplicationReport appMaster = resMgrDelegate
          .getApplicationReport(applicationId);
      String diagnostics =
          (appMaster == null ?
              "application report is null" : appMaster.getDiagnostics());
      if (appMaster == null
          || appMaster.getYarnApplicationState() == YarnApplicationState.FAILED
          || appMaster.getYarnApplicationState() == YarnApplicationState.KILLED) {
        throw new IOException("Failed to run job : " +
            diagnostics);
      }
      return clientCache.getClient(jobId).getJobStatus(jobId);
    } catch (YarnException e) {
      throw new IOException(e);
    }
  }
  
submitApplication分析:
 @Override
 public ApplicationId
 submitApplication(ApplicationSubmissionContext appContext)
 throws YarnException, IOException {
//该client为YarnClient的对象,submitApplication将调用YarnClientImpl中的submitApplication
return client.submitApplication(appContext);
 }
 
YarnClientImpl.submitApplication分析:
 @Override
 public ApplicationId
 submitApplication(ApplicationSubmissionContext appContext)
 throws YarnException, IOException {
ApplicationId applicationId = appContext.getApplicationId();
if (applicationId == null) {
 throw new ApplicationIdNotProvidedException(
 "ApplicationId is not provided in ApplicationSubmissionContext");
}
//将appContext封装到request中,最终提交的是这个request
SubmitApplicationRequest request =
Records.newRecord(SubmitApplicationRequest.class);
request.setApplicationSubmissionContext(appContext);


// Automatically add the timeline DT into the CLC
// Only when the security and the timeline service are both enabled
if (isSecurityEnabled() && timelineServiceEnabled) {
 addTimelineDelegationToken(appContext.getAMContainerSpec());
}

//rmClient为ApplicationClientProtocol对象,ApplicationClientProtocol是协议接口
//submitApplication将application转交给ApplicationClientProtocolPBClientImpl并提交到ResourceManager处理
//ApplicationClientProtocolPBClientImpl.submitApplication方法见下文
rmClient.submitApplication(request);

int pollCount = 0;
long startTime = System.currentTimeMillis();
EnumSet<YarnApplicationState> waitingStates = 
EnumSet.of(YarnApplicationState.NEW,
YarnApplicationState.NEW_SAVING,
YarnApplicationState.SUBMITTED);
EnumSet<YarnApplicationState> failToSubmitStates = 
 EnumSet.of(YarnApplicationState.FAILED,
 YarnApplicationState.KILLED);
 
//一下代码为回去Report以及失败下多次提交任务
while (true) {
 try {
ApplicationReport appReport = getApplicationReport(applicationId);
YarnApplicationState state = appReport.getYarnApplicationState();
if (!waitingStates.contains(state)) {
 if(failToSubmitStates.contains(state)) {
throw new YarnException("Failed to submit " + applicationId + 
" to YARN : " + appReport.getDiagnostics());
 }
 LOG.info("Submitted application " + applicationId);
 break;
}

long elapsedMillis = System.currentTimeMillis() - startTime;
if (enforceAsyncAPITimeout() &&
elapsedMillis >= asyncApiPollTimeoutMillis) {
 throw new YarnException("Timed out while waiting for application " +
 applicationId + " to be submitted successfully");
}

// Notify the client through the log every 10 poll, in case the client
// is blocked here too long.
if (++pollCount % 10 == 0) {
 LOG.info("Application submission is not finished, " +
 "submitted application " + applicationId +
 " is still in " + state);
}
try {
 Thread.sleep(submitPollIntervalMillis);
} catch (InterruptedException ie) {
 LOG.error("Interrupted while waiting for application "
 + applicationId
 + " to be successfully submitted.");
}
 } catch (ApplicationNotFoundException ex) {
// FailOver or RM restart happens before RMStateStore saves
// ApplicationState
LOG.info("Re-submit application " + applicationId + "with the " +
"same ApplicationSubmissionContext");
rmClient.submitApplication(request);
 }
}

return applicationId;
 }
 
submitApplication分析:

 @Override
 public SubmitApplicationResponse submitApplication(
 SubmitApplicationRequest request) throws YarnException,
 IOException {
SubmitApplicationRequestProto requestProto =
((SubmitApplicationRequestPBImpl) request).getProto();
try {

//proxy为ApplicationClientProtocolPB的对象
//ApplicationClientProtocolPB为一个进程间通信的接口。客户端提交job的代码就追踪到这
 return new SubmitApplicationResponsePBImpl(proxy.submitApplication(null,
requestProto));
} catch (ServiceException e) {
 RPCUtil.unwrapAndThrowException(e);
 return null;
}
}


  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值