job.waitForCompletion(true)开始:
waitForCompletion方法如下:
- public boolean waitForCompletion (boolean verbose
- ) throws IOException, InterruptedException,
- ClassNotFoundException {
- if (state == JobState.DEFINE) {
- // 建立了连接 提交作业
- submit();
- }
- if (verbose) {
- jobClient.monitorAndPrintJob( conf, info);
- } else {
- info.waitForCompletion();
- }
- return isSuccessful();
- }
submit函数:
- public void submit () throws IOException, InterruptedException,
- ClassNotFoundException {
- ensureState(JobState.DEFINE);
- setUseNewAPI();
- // Connect to the JobTracker and submit the job
- connect();
- info = jobClient.submitJobInternal( conf);
- super.setJobID( info.getID());
- state = JobState.RUNNING;
- }
connect函数:
- private void connect() throws IOException, InterruptedException {
- ugi.doAs(new PrivilegedExceptionAction<Object>() {
- public Object run() throws IOException {
- // new JobClient连接到了服务端 并且拿到了服务端的代理对象
- jobClient = new JobClient((JobConf) getConfiguration());
- return null;
- }
- });
- }
jobClient函数:
- public JobClient(JobConf conf) throws IOException {
- setConf(conf);//初始化配置文件
- init(conf);//初始化作业
- }
setConf函数:
- public void setConf (Configuration conf) {
- this. conf = conf;
- }
init函数:
- public void init(JobConf conf) throws IOException {
- //读取mapred.job.tracker中指定的jobTracker,如果无法读取,那么使用本地的local的jobTracker
- String tracker = conf.get( "mapred.job.tracker", "local");
- tasklogtimeout = conf.getInt(
- TASKLOG_PULL_TIMEOUT_KEY, DEFAULT_TASKLOG_TIMEOUT );
- this. ugi = UserGroupInformation. getCurrentUser();
- if ( "local".equals(tracker)) {
- conf.setNumMapTasks(1);
- this. jobSubmitClient = new LocalJobRunner(conf);
- } else {
- this. rpcJobSubmitClient =
- createRPCProxy(JobTracker. getAddress(conf), conf);
- this. jobSubmitClient = createProxy( this. rpcJobSubmitClient, conf);
- }
- }
createRPCSubmitClient函数:获取服务端对象和方法
- private static JobSubmissionProtocol createRPCProxy(InetSocketAddress addr,
- Configuration conf) throws IOException {
- //JobSubmissionProtocol :作业提交协议:提交作业的一个接口
- JobSubmissionProtocol rpcJobSubmitClient =
- (JobSubmissionProtocol)RPC. getProxy(
- JobSubmissionProtocol. class,
- JobSubmissionProtocol. versionID, addr,
- UserGroupInformation. getCurrentUser(), conf,
- NetUtils. getSocketFactory(conf, JobSubmissionProtocol.class ),
- 0,
- RetryUtils. getMultipleLinearRandomRetry(
- conf,
- MAPREDUCE_CLIENT_RETRY_POLICY_ENABLED_KEY ,
- MAPREDUCE_CLIENT_RETRY_POLICY_ENABLED_DEFAULT ,
- MAPREDUCE_CLIENT_RETRY_POLICY_SPEC_KEY ,
- MAPREDUCE_CLIENT_RETRY_POLICY_SPEC_DEFAULT
- )
- );
- return rpcJobSubmitClient;
- }
submitJobInternal函数:submitJob方法提交作业
- public
- RunningJob submitJobInternal( final JobConf job
- ) throws FileNotFoundException,
- ClassNotFoundException,
- InterruptedException,
- IOException {
- /*
- * configure the command line options correctly on the submitting dfs
- */
- return ugi.doAs( new PrivilegedExceptionAction<RunningJob>() {
- public RunningJob run() throws FileNotFoundException,
- ClassNotFoundException,
- InterruptedException,
- IOException{
- JobConf jobCopy = job;
- Path jobStagingArea = JobSubmissionFiles.getStagingDir(JobClient. this,
- jobCopy);
- JobID jobId = jobSubmitClient.getNewJobId();
- Path submitJobDir = new Path(jobStagingArea, jobId.toString());
- jobCopy.set( "mapreduce.job.dir" , submitJobDir.toString());
- JobStatus status = null;
- try {
- populateTokenCache(jobCopy, jobCopy.getCredentials());
- copyAndConfigureFiles(jobCopy, submitJobDir);
- // get delegation token for the dir
- TokenCache.obtainTokensForNamenodes(jobCopy.getCredentials(),
- new Path [] {submitJobDir},
- jobCopy);
- Path submitJobFile = JobSubmissionFiles.getJobConfPath(submitJobDir);
- int reduces = jobCopy.getNumReduceTasks();
- InetAddress ip = InetAddress. getLocalHost();
- if (ip != null) {
- job.setJobSubmitHostAddress(ip.getHostAddress());
- job.setJobSubmitHostName(ip.getHostName());
- }
- JobContext context = new JobContext(jobCopy, jobId);
- // Check the output specification
- if (reduces == 0 ? jobCopy.getUseNewMapper() :
- jobCopy.getUseNewReducer()) {
- org.apache.hadoop.mapreduce.OutputFormat<?,?> output =
- ReflectionUtils.newInstance(context.getOutputFormatClass(),
- jobCopy);
- output.checkOutputSpecs(context);
- } else {
- jobCopy.getOutputFormat().checkOutputSpecs( fs, jobCopy);
- }
- jobCopy = (JobConf)context.getConfiguration();
- // Create the splits for the job
- FileSystem fs = submitJobDir.getFileSystem(jobCopy);
- LOG.debug( "Creating splits at " + fs.makeQualified(submitJobDir));
- int maps = writeSplits(context, submitJobDir);
- jobCopy.setNumMapTasks(maps);
- // write "queue admins of the queue to which job is being submitted"
- // to job file.
- String queue = jobCopy.getQueueName();
- AccessControlList acl = jobSubmitClient .getQueueAdmins(queue);
- jobCopy.set(QueueManager. toFullPropertyName(queue,
- QueueACL. ADMINISTER_JOBS.getAclName()), acl.getACLString());
- // Write job file to JobTracker's fs
- FSDataOutputStream out =
- FileSystem. create(fs, submitJobFile,
- new FsPermission(JobSubmissionFiles.JOB_FILE_PERMISSION ));
- try {
- jobCopy.writeXml(out);
- } finally {
- out.close();
- }
- //
- // Now, actually submit the job (using the submit name)
- //
- printTokens(jobId, jobCopy.getCredentials());
- status = jobSubmitClient.submitJob(
- jobId, submitJobDir.toString(), jobCopy.getCredentials());
- JobProfile prof = jobSubmitClient .getJobProfile(jobId);
- if (status != null && prof != null) {
- return new NetworkedJob(status, prof, jobSubmitClient );
- } else {
- throw new IOException( "Could not launch job" );
- }
- } finally {
- if (status == null) {
- LOG.info( "Cleaning up the staging area " + submitJobDir);
- if ( fs != null && submitJobDir != null)
- fs.delete(submitJobDir, true);
- }
- }
- }
- });
- }
submitJob函数,属于
JobSubmissionProtocol接口,jobTracker实现了这个接口,jobTracker实现了这个方法
submitJob
。
public
JobStatus submitJob(JobID jobName, String jobSubmitDir, Credentials ts);