【大数据之Hadoop】十、MapReduce中job提交流程源码分析

阿宁呀

于 2023-04-07 22:45:00 发布

阅读量319

点赞数

分类专栏： hadoop 文章标签： hadoop 大数据 mapreduce

本文链接：https://blog.csdn.net/qq_18625571/article/details/129846522

版权

hadoop 专栏收录该内容

37 篇文章 8 订阅

订阅专栏

源码中：job提交三个信息（jar包，xml，切片信息），在本地不需要提交jar包，只有在集群上才需要提交。
详细流程
1.WordCountDriver类中job提交
boolean result = job.waitForCompletion(true);

public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {

    // 1 获取配置信息以及获取job对象
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf);
    ...
    // 7 提交job
    boolean result = job.waitForCompletion(true);
    System.exit(result ? 0 : 1);
}

2.waitForCompletion(boolean verbose)进入submit()提交。

public boolean waitForCompletion(boolean verbose) throws IOException, InterruptedException, ClassNotFoundException {
    if (this.state == Job.JobState.DEFINE) {   //默认是DEFINE
        this.submit();
    }

    if (verbose) {
        this.monitorAndPrintJob();
    } else {
        int completionPollIntervalMillis = getCompletionPollInterval(this.cluster.getConf());

        while(!this.isComplete()) {
            try {
                Thread.sleep((long)completionPollIntervalMillis);
            } catch (InterruptedException var4) {
            }
        }
    }

    return this.isSuccessful();
}

3.submit()

public void submit() throws IOException, InterruptedException, ClassNotFoundException {
    this.ensureState(Job.JobState.DEFINE);  //确保状态是正确的，只抛出异常
    this.setUseNewAPI(); //处理新旧API的兼容
    this.connect(); //建立连接
    final JobSubmitter submitter = this.getJobSubmitter(this.cluster.getFileSystem(), this.cluster.getClient());
    this.status = (JobStatus)this.ugi.doAs(new PrivilegedExceptionAction<JobStatus>() {
        public JobStatus run() throws IOException, InterruptedException, ClassNotFoundException {
            return submitter.submitJobInternal(Job.this, Job.this.cluster);
        }
    });
    this.state = Job.JobState.RUNNING;
    LOG.info("The url to track the job: " + this.getTrackingURL());
}

4.connect()：处理客户端的连接，与集群进行交互。如果与集群连接则需要yarn进行相连。

synchronized void connect() throws IOException, InterruptedException, ClassNotFoundException {
    if (this.cluster == null) {
        this.cluster = (Cluster)this.ugi.doAs(new PrivilegedExceptionAction<Cluster>() {
            public Cluster run() throws IOException, InterruptedException, ClassNotFoundException {
                return new Cluster(Job.this.getConfiguration()); //创建Cluster类，即创建提交Job的代理
            }
        });
    }
}

5.Cluster构造器

public Cluster(Configuration conf) throws IOException {
    this((InetSocketAddress)null, conf);
}

public Cluster(InetSocketAddress jobTrackAddr, Configuration conf) throws IOException {
    this.fs = null;
    this.sysDir = null;
    this.stagingAreaDir = null;
    this.jobHistoryDir = null;
    this.providerList = null;
    this.conf = conf;
    this.ugi = UserGroupInformation.getCurrentUser();
    // 判断是本地运行环境还是yarn集群运行环境
    this.initialize(jobTrackAddr, conf);
}

6.Cluste初始化initialize(jobTrackAddr, conf)判断是yarn的客户端还是本地客户端

private void initialize(InetSocketAddress jobTrackAddr, Configuration conf) throws IOException {
    this.initProviderList();
    IOException initEx = new IOException("Cannot initialize Cluster. Please check your configuration for mapreduce.framework.name and the correspond server addresses.");
    if (jobTrackAddr != null) {
        LOG.info("Initializing cluster for Job Tracker=" + jobTrackAddr.toString());
    }

    Iterator var4 = this.providerList.iterator();

    while(var4.hasNext()) {// 判断是本地运行环境还是yarn集群运行环境
        ClientProtocolProvider provider = (ClientProtocolProvider)var4.next();  //有yarn的客户端和本地客户端
        LOG.debug("Trying ClientProtocolProvider : " + provider.getClass().getName());
        ClientProtocol clientProtocol = null;

        try {
            if (jobTrackAddr == null) {
                clientProtocol = provider.create(conf);
            } else {
                clientProtocol = provider.create(jobTrackAddr, conf);
            }

            if (clientProtocol != null) {
                this.clientProtocolProvider = provider;
                this.client = clientProtocol;
                LOG.debug("Picked " + provider.getClass().getName() + " as the ClientProtocolProvider");
                break;
            }

            LOG.debug("Cannot pick " + provider.getClass().getName() + " as the ClientProtocolProvider - returned null protocol");
        } catch (Exception var9) {
            String errMsg = "Failed to use " + provider.getClass().getName() + " due to error: ";
            initEx.addSuppressed(new IOException(errMsg, var9));
            LOG.info(errMsg, var9);
        }
    }

    if (null == this.clientProtocolProvider || null == this.client) {
        throw initEx;
    }
}

7.提交job，submit()中的submitJobInternal(Job.this, Job.this.cluster)

JobStatus submitJobInternal(Job job, Cluster cluster) throws ClassNotFoundException, InterruptedException, IOException {
    this.checkSpecs(job);  //验证输出路径
    Configuration conf = job.getConfiguration();
    addMRFrameworkToDistributedCache(conf);
    
    // 1）创建给集群提交数据的Stag路径，用于临时缓存
    Path jobStagingArea = JobSubmissionFiles.getStagingDir(cluster, conf);
    InetAddress ip = InetAddress.getLocalHost();
    if (ip != null) {
        this.submitHostAddress = ip.getHostAddress();
        this.submitHostName = ip.getHostName();
        conf.set("mapreduce.job.submithostname", this.submitHostName);
        conf.set("mapreduce.job.submithostaddress", this.submitHostAddress);
    }

    // 2）获取jobid ，并创建Job路径
    JobID jobId = this.submitClient.getNewJobID();
    job.setJobID(jobId);
    //在刚才的路径后面加入jobid
    Path submitJobDir = new Path(jobStagingArea, jobId.toString());
    JobStatus status = null;

    JobStatus var25;
    try {
        conf.set("mapreduce.job.user.name", UserGroupInformation.getCurrentUser().getShortUserName());
        conf.set("hadoop.http.filter.initializers", "org.apache.hadoop.yarn.server.webproxy.amfilter.AmFilterInitializer");
        conf.set("mapreduce.job.dir", submitJobDir.toString());
        LOG.debug("Configuring job " + jobId + " with " + submitJobDir + " as the submit dir");
        TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[]{submitJobDir}, conf);
        this.populateTokenCache(conf, job.getCredentials());
        if (TokenCache.getShuffleSecretKey(job.getCredentials()) == null) {
            KeyGenerator keyGen;
            try {
                keyGen = KeyGenerator.getInstance("HmacSHA1");
                keyGen.init(64);
            } catch (NoSuchAlgorithmException var20) {
                throw new IOException("Error generating shuffle secret key", var20);
            }

            SecretKey shuffleKey = keyGen.generateKey();
            TokenCache.setShuffleSecretKey(shuffleKey.getEncoded(), job.getCredentials());
        }

        if (CryptoUtils.isEncryptedSpillEnabled(conf)) {
            conf.setInt("mapreduce.am.max-attempts", 1);
            LOG.warn("Max job attempts set to 1 since encrypted intermediatedata spill is enabled");
        }
        
        // 3）拷贝jar包到集群
        this.copyAndConfigureFiles(job, submitJobDir);
        //创建一个带有job_id的临时空文件夹
        //如果是集群模式，在向集群提交代码的时候，客户端模式当前代码的Jar包是一定会被上传到yarn集群的。
        //如果是本地模式，jar包就在本地，不需要提交
        Path submitJobFile = JobSubmissionFiles.getJobConfPath(submitJobDir);
        
        LOG.debug("Creating splits at " + this.jtFs.makeQualified(submitJobDir));

        // 4）计算切片，生成切片4个规划文件，
        int maps = this.writeSplits(job, submitJobDir);
        // MapTask个数 = 切片个数，切片个数给MapTask个数赋值
        conf.setInt("mapreduce.job.maps", maps);
        LOG.info("number of splits:" + maps);
        int maxMaps = conf.getInt("mapreduce.job.max.map", -1);
        if (maxMaps >= 0 && maxMaps < maps) {
            throw new IllegalArgumentException("The number of map tasks " + maps + " exceeded limit " + maxMaps);
        }

        String queue = conf.get("mapreduce.job.queuename", "default");
        AccessControlList acl = this.submitClient.getQueueAdmins(queue);
        conf.set(QueueManager.toFullPropertyName(queue, QueueACL.ADMINISTER_JOBS.getAclName()), acl.getAclString());
        TokenCache.cleanUpTokenReferral(conf);
        if (conf.getBoolean("mapreduce.job.token.tracking.ids.enabled", false)) {
            ArrayList<String> trackingIds = new ArrayList();
            Iterator var15 = job.getCredentials().getAllTokens().iterator();

            while(var15.hasNext()) {
                Token<? extends TokenIdentifier> t = (Token)var15.next();
                trackingIds.add(t.decodeIdentifier().getTrackingId());
            }

            conf.setStrings("mapreduce.job.token.tracking.ids", (String[])trackingIds.toArray(new String[trackingIds.size()]));
        }

        ReservationId reservationId = job.getReservationId();
        if (reservationId != null) {
            conf.set("mapreduce.job.reservation.id", reservationId.toString());
        }
        
        //向Stag路径写XML配置文件
        //确定提交的时候主要是提交job.xml(整个job运行需要的参数)和job.split(切片信息)，如果是集群yarn模式，还需要提交jar包
        this.writeConf(conf, submitJobFile);
        
        this.printTokens(jobId, job.getCredentials());
        
        //客户端提交job信息，完成后，state变成RUNNING
        status = this.submitClient.submitJob(jobId, submitJobDir.toString(), job.getCredentials());
        if (status == null) {
            throw new IOException("Could not launch job");
        }

        var25 = status;
    } finally {
        if (status == null) {
            LOG.info("Cleaning up the staging area " + submitJobDir);
            if (this.jtFs != null && submitJobDir != null) {
                this.jtFs.delete(submitJobDir, true);
            }
        }

    }

    return var25;
}

在这里插入图片描述

8.验证输出路径，submitJobInternal(Job.this, Job.this.cluster)中的checkSpecs(job)

private void checkSpecs(Job job) throws ClassNotFoundException, InterruptedException, IOException {
    label23: {
        JobConf jConf = (JobConf)job.getConfiguration();
        if (jConf.getNumReduceTasks() == 0) {
            if (jConf.getUseNewMapper()) {
                break label23;
            }
        } else if (jConf.getUseNewReducer()) {
            break label23;
        }

		//checkOutputSpecs检查输出路径是否存在（即输出路径的参数必须给出，且不能存在）
        jConf.getOutputFormat().checkOutputSpecs(this.jtFs, jConf);
        return;
    }

    OutputFormat<?, ?> output = (OutputFormat)ReflectionUtils.newInstance(job.getOutputFormatClass(), job.getConfiguration());
    output.checkOutputSpecs(job);
}

9.检查输出路径是否存在，checkSpecs(job)中的checkOutputSpecs(this.jtFs, jConf)

private void copyAndConfigureFiles(Job job, Path jobSubmitDir) throws IOException {
    Configuration conf = job.getConfiguration();
    boolean useWildcards = conf.getBoolean("mapreduce.client.libjars.wildcard", true);
    JobResourceUploader rUploader = new JobResourceUploader(this.jtFs, useWildcards);
    rUploader.uploadResources(job, jobSubmitDir);
    job.getWorkingDirectory();
}

总结

waitForCompletion()

submit();

// 1建立连接
    connect();  
        // 1）创建提交Job的代理
        new Cluster(getConfiguration());
            //（1）判断是本地运行环境还是yarn集群运行环境
            initialize(jobTrackAddr,conf); 

// 2 提交job
submitter.submitJobInternal(Job.this,cluster)

    // 1）创建Stag路径，提交给集群，用于临时缓存文件
    Path jobStagingArea =JobSubmissionFiles.getStagingDir(cluster, conf);

    // 2）创建jobid ，并创建Job路径
    JobID jobId = submitClient.getNewJobID();

    // 3）如果是集群模式，则拷贝jar包到集群；本地则不需要
copyAndConfigureFiles(job,submitJobDir);  
    rUploader.uploadFiles(job, jobSubmitDir);

    // 4）计算切片，生成切片信息文件
writeSplits(job,submitJobDir);
        maps= writeNewSplits(job, jobSubmitDir);
        input.getSplits(job);

    // 5）向Stag路径写XML配置文件
writeConf(conf,submitJobFile);
    conf.writeXml(out);

    // 6）提交Job,返回提交状态，即状态从DEFINE转为RUNNIN
status =submitClient.submitJob(jobId, submitJobDir.toString(), job.getCredentials());