大数据--mapreduce作业提交源码

MapReduce作业提交查看源码

1、判断job的状态

//将private Job.JobState state的值修改为DEFINE然后执行submit()方法;

public boolean waitForCompletion(boolean verbose) throws IOException, InterruptedException, ClassNotFoundException {
    if (this.state == Job.JobState.DEFINE) {
        this.submit();
    }

    if (verbose) {
        this.monitorAndPrintJob();
    } else {
        int completionPollIntervalMillis = getCompletionPollInterval(this.cluster.getConf());

        while(!this.isComplete()) {
            try {
                Thread.sleep((long)completionPollIntervalMillis);
            } catch (InterruptedException var4) {
            }
        }
    }

    return this.isSuccessful();
}

 

2、确认job的状态是否是DEFINE

//执行this.ensureState(Job.JobState.DEFINE);代码判断job的状态;

public void submit() throws IOException, InterruptedException, ClassNotFoundException {
    this.ensureState(Job.JobState.DEFINE);
    this.setUseNewAPI();
    this.connect();
    final JobSubmitter submitter = this.getJobSubmitter(this.cluster.getFileSystem(), this.cluster.getClient());
    this.status = (JobStatus)this.ugi.doAs(new PrivilegedExceptionAction<JobStatus>() {
        public JobStatus run() throws IOException, InterruptedException, ClassNotFoundException {
            return submitter.submitJobInternal(Job.this, Job.this.cluster);
        }
    });
    this.state = Job.JobState.RUNNING;
    LOG.info("The url to track the job: " + this.getTrackingURL());
}

 

 

3、执行ensureState方法确认job的状态是DEFINE

private void ensureState(Job.JobState state) throws IllegalStateException {
    if (state != this.state) {
        throw new IllegalStateException("Job in state " + this.state + " instead of " + state);
    } else if (state == Job.JobState.RUNNING && this.cluster == null) {
        throw new IllegalStateException("Job in state " + this.state + ", but it isn't attached to any job tracker!");
    }
}

 

4、执行setUseNewAPI方法切换新的API

private void setUseNewAPI() throws IOException {
    int numReduces = this.conf.getNumReduceTasks();
    String oldMapperClass = "mapred.mapper.class";
    String oldReduceClass = "mapred.reducer.class";
    this.conf.setBooleanIfUnset("mapred.mapper.new-api", this.conf.get(oldMapperClass) == null);
    String mode;
    if (this.conf.getUseNewMapper()) {
        mode = "new map API";
        this.ensureNotSet("mapred.input.format.class", mode);
        this.ensureNotSet(oldMapperClass, mode);
        if (numReduces != 0) {
            this.ensureNotSet("mapred.partitioner.class", mode);
        } else {
            this.ensureNotSet("mapred.output.format.class", mode);
        }
    } else {
        mode = "map compatibility";
        this.ensureNotSet("mapreduce.job.inputformat.class", mode);
        this.ensureNotSet("mapreduce.job.map.class", mode);
        if (numReduces != 0) {
            this.ensureNotSet("mapreduce.job.partitioner.class", mode);
        } else {
            this.ensureNotSet("mapreduce.job.outputformat.class", mode);
        }
    }

 

    if (numReduces != 0) {
        this.conf.setBooleanIfUnset("mapred.reducer.new-api", this.conf.get(oldReduceClass) == null);
        if (this.conf.getUseNewReducer()) {
            mode = "new reduce API";
            this.ensureNotSet("mapred.output.format.class", mode);
            this.ensureNotSet(oldReduceClass, mode);
        } else {
            mode = "reduce compatibility";
            this.ensureNotSet("mapreduce.job.outputformat.class", mode);
            this.ensureNotSet("mapreduce.job.reduce.class", mode);
        }
    }

}

 

//执行getNumReduceTasks方法获取ReduceTask的个数

public int getNumReduceTasks() {

    //ReduceTask的数目的默认值是1
    return this.getInt("mapreduce.job.reduces", 1);
}

 

5、执行connect方法

private synchronized void connect() throws IOException, InterruptedException, ClassNotFoundException {
    if (this.cluster == null) {
        this.cluster = (Cluster)this.ugi.doAs(new PrivilegedExceptionAction<Cluster>() {
            public Cluster run() throws IOException, InterruptedException, ClassNotFoundException {
                return new Cluster(Job.this.getConfiguration());
            }
        });
    }

}

 

public Cluster(InetSocketAddress jobTrackAddr, Configuration conf) throws IOException {
    this.fs = null;
    this.sysDir = null;
    this.stagingAreaDir = null;
    this.jobHistoryDir = null;
    this.providerList = null;
    this.conf = conf;

    //通过ugi获取到提交job的用户名
    this.ugi = UserGroupInformation.getCurrentUser();
    this.initialize(jobTrackAddr, conf);
}

Cluster构造器里面conf的值:

 

 

 

 

 

private void initialize(InetSocketAddress jobTrackAddr, Configuration conf) throws IOException {
    this.initProviderList();
    Iterator i$ = this.providerList.iterator();

    while(i$.hasNext()) {
        ClientProtocolProvider provider = (ClientProtocolProvider)i$.next();
        LOG.debug("Trying ClientProtocolProvider : " + provider.getClass().getName());
        ClientProtocol clientProtocol = null;

        try {
            if (jobTrackAddr == null) {
                clientProtocol = provider.create(conf);
            } else {
                clientProtocol = provider.create(jobTrackAddr, conf);
            }

            if (clientProtocol != null) {
                this.clientProtocolProvider = provider;
                this.client = clientProtocol;
                LOG.debug("Picked " + provider.getClass().getName() + " as the ClientProtocolProvider");
                break;
            }

            LOG.debug("Cannot pick " + provider.getClass().getName() + " as the ClientProtocolProvider - returned null protocol");
        } catch (Exception var7) {
            LOG.info("Failed to use " + provider.getClass().getName() + " due to error: ", var7);
        }
    }

    if (null == this.clientProtocolProvider || null == this.client) {
        throw new IOException("Cannot initialize Cluster. Please check your configuration for mapreduce.framework.name and the correspond server addresses.");
    }
}

通过ClientProtocolProvider能看到有两个子类一个是本地的一个是集群的:

 

 

 

 

 

6、创建job的提交路径

public synchronized FileSystem getFileSystem() throws IOException, InterruptedException {
    if (this.fs == null) {
        try {
            this.fs = (FileSystem)this.ugi.doAs(new PrivilegedExceptionAction<FileSystem>() {
                public FileSystem run() throws IOException, InterruptedException {
                    Path sysDir = new Path(Cluster.this.client.getSystemDir());
                    return sysDir.getFileSystem(Cluster.this.getConf());
                }
            });
        } catch (InterruptedException var2) {
            throw new RuntimeException(var2);
        }
    }

    return this.fs;
}

 

public String getSystemDir() {
    Path sysDir = new Path(this.conf.get("mapreduce.jobtracker.system.dir", "/tmp/hadoop/mapred/system"));
    return this.fs.makeQualified(sysDir).toString();
}

通过getSystemDir方法拿到系统的提交job的路径

 

 

 

//将配置信息写到分布式缓存里面去,将生成的jobID返回出去;

JobStatus submitJobInternal(Job job, Cluster cluster) throws ClassNotFoundException, InterruptedException, IOException {
    this.checkSpecs(job);
    Configuration conf = job.getConfiguration();

    //将配置信息添加到分布式缓存里
    addMRFrameworkToDistributedCache(conf);
    Path jobStagingArea = JobSubmissionFiles.getStagingDir(cluster, conf);

    //获取本地IP
    InetAddress ip = InetAddress.getLocalHost();
    if (ip != null) {
        this.submitHostAddress = ip.getHostAddress();
        this.submitHostName = ip.getHostName();
        conf.set("mapreduce.job.submithostname", this.submitHostName);
        conf.set("mapreduce.job.submithostaddress", this.submitHostAddress);
    }
    //生成新的jobID
    JobID jobId = this.submitClient.getNewJobID();

    //生成的信息的jobID添加到job里
    job.setJobID(jobId);

    //生成存放job信息的路径
    Path submitJobDir = new Path(jobStagingArea, jobId.toString());
    JobStatus status = null;

    JobStatus var24;
    try {

        //将相关的配置信息添加到缓存
        conf.set("mapreduce.job.user.name", UserGroupInformation.getCurrentUser().getShortUserName());
        conf.set("hadoop.http.filter.initializers", "org.apache.hadoop.yarn.server.webproxy.amfilter.AmFilterInitializer");
        conf.set("mapreduce.job.dir", submitJobDir.toString());
        LOG.debug("Configuring job " + jobId + " with " + submitJobDir + " as the submit dir");
        TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[]{submitJobDir}, conf);
        this.populateTokenCache(conf, job.getCredentials());
        if (TokenCache.getShuffleSecretKey(job.getCredentials()) == null) {
            KeyGenerator keyGen;
            try {
                keyGen = KeyGenerator.getInstance("HmacSHA1");
                keyGen.init(64);
            } catch (NoSuchAlgorithmException var19) {
                throw new IOException("Error generating shuffle secret key", var19);
            }

            SecretKey shuffleKey = keyGen.generateKey();
            TokenCache.setShuffleSecretKey(shuffleKey.getEncoded(), job.getCredentials());
        }

        if (CryptoUtils.isEncryptedSpillEnabled(conf)) {
            conf.setInt("mapreduce.am.max-attempts", 1);
            LOG.warn("Max job attempts set to 1 since encrypted intermediatedata spill is enabled");
        }

        this.copyAndConfigureFiles(job, submitJobDir);
        Path submitJobFile = JobSubmissionFiles.getJobConfPath(submitJobDir);
        LOG.debug("Creating splits at " + this.jtFs.makeQualified(submitJobDir));

        //根据生成切片文件个数确定maptask的数量
        int maps = this.writeSplits(job, submitJobDir);
        conf.setInt("mapreduce.job.maps", maps);

        //这个maps的值是1
        LOG.info("number of splits:" + maps);
        String queue = conf.get("mapreduce.job.queuename", "default");
        AccessControlList acl = this.submitClient.getQueueAdmins(queue);
        conf.set(QueueManager.toFullPropertyName(queue, QueueACL.ADMINISTER_JOBS.getAclName()), acl.getAclString());
        TokenCache.cleanUpTokenReferral(conf);
        if (conf.getBoolean("mapreduce.job.token.tracking.ids.enabled", false)) {
            ArrayList<String> trackingIds = new ArrayList();
            Iterator i$ = job.getCredentials().getAllTokens().iterator();

            while(i$.hasNext()) {
                Token<? extends TokenIdentifier> t = (Token)i$.next();
                trackingIds.add(t.decodeIdentifier().getTrackingId());
            }

            conf.setStrings("mapreduce.job.token.tracking.ids", (String[])trackingIds.toArray(new String[trackingIds.size()]));
        }

        ReservationId reservationId = job.getReservationId();
        if (reservationId != null) {
            conf.set("mapreduce.job.reservation.id", reservationId.toString());
        }
        //将相关的配置信息写到磁盘job.xml里面
        this.writeConf(conf, submitJobFile);
        this.printTokens(jobId, job.getCredentials());
        status = this.submitClient.submitJob(jobId, submitJobDir.toString(), job.getCredentials());
        if (status == null) {
            throw new IOException("Could not launch job");
        }

        var24 = status;
    } finally {
        if (status == null) {
            LOG.info("Cleaning up the staging area " + submitJobDir);
            if (this.jtFs != null && submitJobDir != null) {
                this.jtFs.delete(submitJobDir, true);
            }
        }

    }

    return var24;
}

 

 

private <T extends InputSplit> int writeNewSplits(JobContext job, Path jobSubmitDir) throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = job.getConfiguration();
    InputFormat<?, ?> input = (InputFormat)ReflectionUtils.newInstance(job.getInputFormatClass(), conf);
    List<InputSplit> splits = input.getSplits(job);
    T[] array = (InputSplit[])((InputSplit[])splits.toArray(new InputSplit[splits.size()]));
    Arrays.sort(array, new JobSubmitter.SplitComparator());
    JobSplitWriter.createSplitFiles(jobSubmitDir, conf, jobSubmitDir.getFileSystem(conf), array);
    return array.length;
}

//待处理的文件是一个小于128M的文件所以切片数是1

 

 

 

生成的切片文件信息提交到相关路径

 

 

生成的ob.xml的信息:

 

 

 

 

 

 

7、更新job的状态为RUNNING

public void submit() throws IOException, InterruptedException, ClassNotFoundException {
    this.ensureState(Job.JobState.DEFINE);
    this.setUseNewAPI();
    this.connect();
    final JobSubmitter submitter = this.getJobSubmitter(this.cluster.getFileSystem(), this.cluster.getClient());
    this.status = (JobStatus)this.ugi.doAs(new PrivilegedExceptionAction<JobStatus>() {
        public JobStatus run() throws IOException, InterruptedException, ClassNotFoundException {
            return submitter.submitJobInternal(Job.this, Job.this.cluster);
        }
    });
    this.state = Job.JobState.RUNNING;
    LOG.info("The url to track the job: " + this.getTrackingURL());
}

 

stateDEFINE更新为RUNNNG

 

 

 

 

public synchronized String getTrackingUrl() {
    return this.trackingUrl;
}

获取到URL

 

 

 

 

 

8、执行mapreduce任务

public boolean waitForCompletion(boolean verbose) throws IOException, InterruptedException, ClassNotFoundException {

//state的值是RUNNING跳过该代码块
    if (this.state == Job.JobState.DEFINE) {
        this.submit();
    }
    //verbose的值是true所以开始执行mapreduce任务

//
    if (verbose) {
        this.monitorAndPrintJob();
    } else {
        int completionPollIntervalMillis = getCompletionPollInterval(this.cluster.getConf());

        while(!this.isComplete()) {
            try {
                Thread.sleep((long)completionPollIntervalMillis);
            } catch (InterruptedException var4) {
            }
        }
    }

    return this.isSuccessful();
}

 

 

 

转载于:https://www.cnblogs.com/jeff190812/p/11435824.html

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值