1、Job的创建
Job类中重载了3个getInstance(),这个方法是唯一暴露给外部的方法
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://192.168.116.117:8020/");
Job job = Job.getInstance(conf,"wordCount");
Job中getInstance()部分的源码
public class Job extends JobContextImpl implements JobContext {
//无参的实例,实际会调用一个默认的配置文件
public static Job getInstance() throws IOException {
// create with a null Cluster
return getInstance(new Configuration());
}
//传入一个配置文件
public static Job getInstance(Configuration conf) throws IOException {
// create with a null Cluster
JobConf jobConf = new JobConf(conf);
return new Job(jobConf);
}
//传入配置文件,jobName是job的名字,默认是jar包的名字
public static Job getInstance(Configuration conf, String jobName)
throws IOException {
// create with a null Cluster
Job result = getInstance(conf);
result.setJobName(jobName);
return result;
}
//没有特定的culster
@Deprecated
public static Job getInstance(Cluster ignored) throws IOException {
return getInstance();
}
//没有特定的culster,只有在需要时才会从conf参数创建群集
@Deprecated
public static Job getInstance(Cluster ignored, Configuration conf)
throws IOException {
return getInstance(conf);
}
//传入集群对象和job状态
@Private
public static Job getInstance(Cluster cluster, JobStatus status,
Configuration conf) throws IOException {
Job job = getInstance(status, conf);
job.setCluster(cluster);
return job;
}
...
}
2、作业的提交
job.waitForCompletion(true)
waitForCompletion()源码
/**
* Submit the job to the cluster and wait for it to finish.
* @param verbose print the progress to the user
* @return true if the job succeeded
* @throws IOException thrown if the communication with the
* <code>JobTracker</code> is lost
*/
public boolean waitForCompletion(boolean verbose
) throws IOException<