大数据--mapreduce作业提交源码

最新推荐文章于 2021-06-25 11:52:27 发布

awys19931001

最新推荐文章于 2021-06-25 11:52:27 发布

阅读量241

点赞数

文章标签：大数据

原文链接：http://www.cnblogs.com/jeff190812/p/11435824.html

版权

MapReduce作业提交查看源码

1、判断job的状态

//将private Job.JobState state的值修改为DEFINE，然后执行submit（）方法；

public boolean waitForCompletion(boolean verbose) throws IOException, InterruptedException, ClassNotFoundException {
    if (this.state == Job.JobState.DEFINE) {
        this.submit();
    }

    if (verbose) {
        this.monitorAndPrintJob();
    } else {
        int completionPollIntervalMillis = getCompletionPollInterval(this.cluster.getConf());

        while(!this.isComplete()) {
            try {
                Thread.sleep((long)completionPollIntervalMillis);
            } catch (InterruptedException var4) {
            }
        }
    }

    return this.isSuccessful();
}

2、确认job的状态是否是DEFINE

//执行this.ensureState(Job.JobState.DEFINE);代码判断job的状态；

public void submit() throws IOException, InterruptedException, ClassNotFoundException {
    this.ensureState(Job.JobState.DEFINE);
    this.setUseNewAPI();
    this.connect();
    final JobSubmitter submitter = this.getJobSubmitter(this.cluster.getFileSystem(), this.cluster.getClient());
    this.status = (JobStatus)this.ugi.doAs(new PrivilegedExceptionAction<JobStatus>() {
        public JobStatus run() throws IOException, InterruptedException, ClassNotFoundException {
            return submitter.submitJobInternal(Job.this, Job.this.cluster);
        }
    });
    this.state = Job.JobState.RUNNING;
    LOG.info("The url to track the job: " + this.getTrackingURL());
}

3、执行ensureState方法确认job的状态是DEFINE

private void ensureState(Job.JobState state) throws IllegalStateException {
    if (state != this.state) {
        throw new IllegalStateException("Job in state " + this.state + " instead of " + state);
    } else if (state == Job.JobState.RUNNING && this.cluster == null) {
        throw new IllegalStateException("Job in state " + this.state + ", but it isn't attached to any job tracker!");
    }
}

4、执行setUseNewAPI方法切换新的API

private void setUseNewAPI() throws IOException {
    int numReduces = this.conf.getNumReduceTasks();
    String oldMapperClass = "mapred.mapper.class";
    String oldReduceClass = "mapred.reducer.class";
    this.conf.setBooleanIfUnset("mapred.mapper.new-api", this.conf.get(oldMapperClass) == null);
    String mode;
    if (this.conf.getUseNewMapper()) {
        mode = "new map API";
        this.ensureNotSet("mapred.input.format.class", mode);
        this.ensureNotSet(oldMapperClass, mode);
        if (numReduces != 0) {
            this.ensureNotSet("mapred.partitioner.class", mode);
        } else {
            this.ensureNotSet("mapred.output.format.class", mode);
        }
    } else {
        mode = "map compatibility";
        this.ensureNotSet("mapreduce.job.inputformat.class", mode);
        this.ensureNotSet("mapreduce.job.map.class", mode);
        if (numReduces != 0) {
            this.ensureNotSet("mapreduce.job.partitioner.class", mode);
        } else {
            this.ensureNotSet("mapreduce.job.outputformat.class", mode);
        }
    }

    if (numReduces != 0) {
        this.conf.setBooleanIfUnset("mapred.reducer.new-api", this.conf.get(oldReduceClass) == null);
        if (this.conf.getUseNewReducer()) {
            mode = "new reduce API";
            this.ensureNotSet("mapred.output.format.class", mode);
            this.ensureNotSet(oldReduceClass, mode);
        } else {
            mode = "reduce compatibility";
            this.ensureNotSet("mapreduce.job.outputformat.class", mode);
            this.ensureNotSet("mapreduce.job.reduce.class", mode);
        }
    }

}

//执行getNumReduceTasks方法获取ReduceTask的个数

public int getNumReduceTasks() {

//ReduceTask的数目的默认值是1
return this.getInt("mapreduce.job.reduces", 1);
}

5、执行connect方法

private synchronized void connect() throws IOException, InterruptedException, ClassNotFoundException {
    if (this.cluster == null) {
        this.cluster = (Cluster)this.ugi.doAs(new PrivilegedExceptionAction<Cluster>() {
            public Cluster run() throws IOException, InterruptedException, ClassNotFoundException {
                return new Cluster(Job.this.getConfiguration());
            }
        });
    }

}

public Cluster(InetSocketAddress jobTrackAddr, Configuration conf) throws IOException {
    this.fs = null;
    this.sysDir = null;
    this.stagingAreaDir = null;
    this.jobHistoryDir = null;
    this.providerList = null;
    this.conf = conf;

//通过ugi获取到提交job的用户名
this.ugi = UserGroupInformation.getCurrentUser();
this.initialize(jobTrackAddr, conf);
}

Cluster构造器里面conf的值：

private void initialize(InetSocketAddress jobTrackAddr, Configuration conf) throws IOException {
    this.initProviderList();
    Iterator i$ = this.providerList.iterator();

    while(i$.hasNext()) {
        ClientProtocolProvider provider = (ClientProtocolProvider)i$.next();
        LOG.debug("Trying ClientProtocolProvider : " + provider.getClass().getName());
        ClientProtocol clientProtocol = null;

        try {
            if (jobTrackAddr == null) {
                clientProtocol = provider.create(conf);
            } else {
                clientProtocol = provider.create(jobTrackAddr, conf);
            }

            if (clientProtocol != null) {
                this.clientProtocolProvider = provider;
                this.client = clientProtocol;
                LOG.debug("Picked " + provider.getClass().getName() + " as the ClientProtocolProvider");
                break;
            }

            LOG.debug("Cannot pick " + provider.getClass().getName() + " as the ClientProtocolProvider - returned null protocol");
        } catch (Exception var7) {
            LOG.info("Failed to use " + provider.getClass().getName() + " due to error: ", var7);
        }
    }

    if (null == this.clientProtocolProvider || null == this.client) {
        throw new IOException("Cannot initialize Cluster. Please check your configuration for mapreduce.framework.name and the correspond server addresses.");
    }
}

通过ClientProtocolProvider能看到有两个子类一个是本地的一个是集群的：

6、创建job的提交路径

public synchronized FileSystem getFileSystem() throws IOException, InterruptedException {
    if (this.fs == null) {
        try {
            this.fs = (FileSystem)this.ugi.doAs(new PrivilegedExceptionAction<FileSystem>() {
                public FileSystem run() throws IOException, InterruptedException {
                    Path sysDir = new Path(Cluster.this.client.getSystemDir());
                    return sysDir.getFileSystem(Cluster.this.getConf());
                }
            });
        } catch (InterruptedException var2) {
            throw new RuntimeException(var2);
        }
    }

    return this.fs;
}

public String getSystemDir() {
Path sysDir = new Path(this.conf.get("mapreduce.jobtracker.system.dir", "/tmp/hadoop/mapred/system"));
return this.fs.makeQualified(sysDir).toString();
}

通过getSystemDir方法拿到系统的提交job的路径

//将配置信息写到分布式缓存里面去，将生成的jobID返回出去；

JobStatus submitJobInternal(Job job, Cluster cluster) throws ClassNotFoundException, InterruptedException, IOException {
this.checkSpecs(job);
Configuration conf = job.getConfiguration();

//将配置信息添加到分布式缓存里
addMRFrameworkToDistributedCache(conf);
Path jobStagingArea = JobSubmissionFiles.getStagingDir(cluster, conf);

//获取本地IP
    InetAddress ip = InetAddress.getLocalHost();
    if (ip != null) {
        this.submitHostAddress = ip.getHostAddress();
        this.submitHostName = ip.getHostName();
        conf.set("mapreduce.job.submithostname", this.submitHostName);
        conf.set("mapreduce.job.submithostaddress", this.submitHostAddress);
    }
    //生成新的jobID
    JobID jobId = this.submitClient.getNewJobID();

//生成的信息的jobID添加到job里
job.setJobID(jobId);

//生成存放job信息的路径
    Path submitJobDir = new Path(jobStagingArea, jobId.toString());
    JobStatus status = null;

    JobStatus var24;
    try {

//将相关的配置信息添加到缓存
        conf.set("mapreduce.job.user.name", UserGroupInformation.getCurrentUser().getShortUserName());
        conf.set("hadoop.http.filter.initializers", "org.apache.hadoop.yarn.server.webproxy.amfilter.AmFilterInitializer");
        conf.set("mapreduce.job.dir", submitJobDir.toString());
        LOG.debug("Configuring job " + jobId + " with " + submitJobDir + " as the submit dir");
        TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[]{submitJobDir}, conf);
        this.populateTokenCache(conf, job.getCredentials());
        if (TokenCache.getShuffleSecretKey(job.getCredentials()) == null) {
            KeyGenerator keyGen;
            try {
                keyGen = KeyGenerator.getInstance("HmacSHA1");
                keyGen.init(64);
            } catch (NoSuchAlgorithmException var19) {
                throw new IOException("Error generating shuffle secret key", var19);
            }

            SecretKey shuffleKey = keyGen.generateKey();
            TokenCache.setShuffleSecretKey(shuffleKey.getEncoded(), job.getCredentials());
        }

        if (CryptoUtils.isEncryptedSpillEnabled(conf)) {
            conf.setInt("mapreduce.am.max-attempts", 1);
            LOG.warn("Max job attempts set to 1 since encrypted intermediatedata spill is enabled");
        }

        this.copyAndConfigureFiles(job, submitJobDir);
        Path submitJobFile = JobSubmissionFiles.getJobConfPath(submitJobDir);
        LOG.debug("Creating splits at " + this.jtFs.makeQualified(submitJobDir));

//根据生成切片文件个数确定maptask的数量
int maps = this.writeSplits(job, submitJobDir);
conf.setInt("mapreduce.job.maps", maps);

//这个maps的值是1
        LOG.info("number of splits:" + maps);
        String queue = conf.get("mapreduce.job.queuename", "default");
        AccessControlList acl = this.submitClient.getQueueAdmins(queue);
        conf.set(QueueManager.toFullPropertyName(queue, QueueACL.ADMINISTER_JOBS.getAclName()), acl.getAclString());
        TokenCache.cleanUpTokenReferral(conf);
        if (conf.getBoolean("mapreduce.job.token.tracking.ids.enabled", false)) {
            ArrayList<String> trackingIds = new ArrayList();
            Iterator i$ = job.getCredentials().getAllTokens().iterator();

            while(i$.hasNext()) {
                Token<? extends TokenIdentifier> t = (Token)i$.next();
                trackingIds.add(t.decodeIdentifier().getTrackingId());
            }

            conf.setStrings("mapreduce.job.token.tracking.ids", (String[])trackingIds.toArray(new String[trackingIds.size()]));
        }

        ReservationId reservationId = job.getReservationId();
        if (reservationId != null) {
            conf.set("mapreduce.job.reservation.id", reservationId.toString());
        }
        //将相关的配置信息写到磁盘job.xml里面
        this.writeConf(conf, submitJobFile);
        this.printTokens(jobId, job.getCredentials());
        status = this.submitClient.submitJob(jobId, submitJobDir.toString(), job.getCredentials());
        if (status == null) {
            throw new IOException("Could not launch job");
        }

        var24 = status;
    } finally {
        if (status == null) {
            LOG.info("Cleaning up the staging area " + submitJobDir);
            if (this.jtFs != null && submitJobDir != null) {
                this.jtFs.delete(submitJobDir, true);
            }
        }

    }

    return var24;
}

private <T extends InputSplit> int writeNewSplits(JobContext job, Path jobSubmitDir) throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = job.getConfiguration();
    InputFormat<?, ?> input = (InputFormat)ReflectionUtils.newInstance(job.getInputFormatClass(), conf);
    List<InputSplit> splits = input.getSplits(job);
    T[] array = (InputSplit[])((InputSplit[])splits.toArray(new InputSplit[splits.size()]));
    Arrays.sort(array, new JobSubmitter.SplitComparator());
    JobSplitWriter.createSplitFiles(jobSubmitDir, conf, jobSubmitDir.getFileSystem(conf), array);
    return array.length;
}

//待处理的文件是一个小于128M的文件所以切片数是1：

生成的切片文件信息提交到相关路径

生成的ob.xml的信息：

7、更新job的状态为RUNNING

将state从DEFINE更新为RUNNNG；

public synchronized String getTrackingUrl() {
return this.trackingUrl;
}

获取到URL：

8、执行mapreduce任务

public boolean waitForCompletion(boolean verbose) throws IOException, InterruptedException, ClassNotFoundException {

//state的值是RUNNING跳过该代码块
    if (this.state == Job.JobState.DEFINE) {
        this.submit();
    }
    //verbose的值是true所以开始执行mapreduce任务

//
    if (verbose) {
        this.monitorAndPrintJob();
    } else {
        int completionPollIntervalMillis = getCompletionPollInterval(this.cluster.getConf());

        while(!this.isComplete()) {
            try {
                Thread.sleep((long)completionPollIntervalMillis);
            } catch (InterruptedException var4) {
            }
        }
    }

    return this.isSuccessful();
}

转载于:https://www.cnblogs.com/jeff190812/p/11435824.html

awys19931001

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
大数据--mapreduce作业提交源码

MapReduce作业提交查看源码1、判断job的状态//将private Job.JobState state的值修改为DEFINE，然后执行submit（）方法；public boolean waitForCompletion(boolean verbose) throws IOException, InterruptedException, ClassNotFoundE...
复制链接

扫一扫