Job提交机制
MRJob在最后阶段来提交一个做业来前面编好的代码,这与scala中的延迟lazy类似,也类似于spark中的transformation和action。
Job提交有两种方式,常用的job.waitForCompletion(true); 和 job.submit();
job.waitForCompletion(true);
job.submit();
图解
Job提交的源代码,其实在waitForCompletion中也是调用了submit()的方法进行的对工作的提交
public boolean waitForCompletion(boolean verbose) throws IOException, InterruptedException, ClassNotFoundException {
if (this.state == Job.JobState.DEFINE) {
//这里进行工作的提交,state值为DEFINE,则提交
this.submit();
}
//此值是我们waitForCompletion(true)的true值
if (verbose) {
//监测Job的相关信息
this.monitorAndPrintJob();
} else {
int completionPollIntervalMillis = getCompletionPollInterval(this.cluster.getConf());
while(!this.isComplete()) {
try {
Thread.sleep((long)completionPollIntervalMillis);
} catch (InterruptedException var4) {
}
}
}
//返回作业提交是否成功
return this.isSuccessful();
}
在submit的源码中,
1.再次确认提交状态
2.使用新API,避免兼容问题
public void submit() throws IOException, InterruptedException, ClassNotFoundException {
//1.再次确认提交状态
this.ensureState(Job.JobState.DEFINE);
//2.使用新API,避免兼容问题
this.setUseNewAPI();
//3.初始化工作,为Cluster赋值,最主要的就是为Cluster中的Client赋值
//Client为提交器=>分为local提交器和Yarn提交器,具体创建哪个、由配置文件决定,下一个方法为connect()
this.connect();
//获取文件系统FileSystem和Client客户端
final JobSubmitter submitter = this.getJobSubmitter(this.cluster.getFileSystem(), this.cluster.getClient());
this.status = (JobStatus)this.ugi.doAs(new PrivilegedExceptionAction<JobStatus>() {
public JobStatus run() throws IOException, InterruptedException, ClassNotFoundException {
//9.job提交的核心阶段
return submitter.submitJobInternal(Job.this, Job.this.cluster);
}
});
//10.Job开始运行
this.state = Job.JobState.RUNNING;
LOG.info("The url to track the job: " + this.getTrackingURL());
}
3.初始化工作的connect();,可以看出方法创建了一个cluster并返回了一个Cluster
private synchronized void connect() throws IOException, InterruptedException, ClassNotFoundException {
//如果Cluster为空、则构造Cluster实例cluster
if (this.cluster == null) {
//此为MR集群的一种远程工具、提供了链接MR集群的方法
this.cluster = (Cluster)this.ugi.doAs(new PrivilegedExceptionAction<Cluster>() {
public Cluster run() throws IOException, InterruptedException, ClassNotFoundException {
//4.下一个方法为Cluster的配置初始化信息
return new Cluster(Job.this.getConfiguration());
}
});
}
}
4.Clust初始化和创建
public Cluster(Configuration conf) throws IOException {
this((InetSocketAddress)null, conf);
}
public Cluster(InetSocketAddress jobTrackAddr, Configuration conf) throws IOException {
//FileSystem 文件系统实例
this.fs = null;
//Path路径
this.sysDir = null;
//作业资源路径
this.stagingAreaDir = null;
//Path历史路径
this.jobHistoryDir = null;
//客户端通信协议提供者
this.providerList = null;
//Configuration配置信息
this.conf = conf;
//UserGroupInformation 用户组信息
this.ugi = UserGroupInformation.getCurrentUser();
//5.使用initialize进行初始化
this.initialize(jobTrackAddr, conf);
}
5.使用initialize进行初始化
private void initialize(InetSocketAddress jobTrackAddr, Configuration conf) throws IOException {
this.initProviderList();
IOException initEx = new IOException("Cannot initialize Cluster. Please check your configuration for mapreduce.framework.name and the correspond server addresses.");
if (jobTrackAddr != null) {
LOG.info("Initializing cluster for Job Tracker=" + jobTrackAddr.toString());
}
//以迭代的方式去依次去或获取ClientProtocolProvider,
//通过create方法构造ClientProtocol实例,然后赋值provider和clientProtocol
Iterator var4 = this.providerList.iterator();
while(var4.hasNext()) {
//对ClientProtocolProvider在IDEA中按ctrl + H ,
//会发现他两个子类:6.LocalClientProtocolProvider(本地运行)、7.YarnClientProtocolProvider(Yarn上运行)
//首先会赋值本地运行(LocalRunner),其次才是Yarn运行(YarnRunner),他是根据配置文件做的比对
ClientProtocolProvider provider = (ClientProtocolProvider)var4.next();
LOG.debug("Trying ClientProtocolProvider : " + provider.getClass().getName());
ClientProtocol clientProtocol = null;
try {
if (jobTrackAddr == null) {
clientProtocol = provider.create(conf);
} else {
clientProtocol = provider.create(jobTrackAddr, conf);
}
if (clientProtocol != null) {
this.clientProtocolProvider = provider;
this.client = clientProtocol;
LOG.debug("Picked " + provider.getClass().getName() + " as the ClientProtocolProvider");
break;
}
LOG.debug("Cannot pick " + provider.getClass().getName() + " as the ClientProtocolProvider - returned null protocol");
} catch (Exception var9) {
String errMsg = "Failed to use " + provider.getClass().getName() + " due to error: ";
initEx.addSuppressed(new IOException(errMsg, var9));
LOG.info(errMsg, var9);
}
}
if (null == this.clientProtocolProvider || null == this.client) {
throw initEx;
}
}
6.LocalClientProtocolProvider(本地运行)由单参数直接返回LocalJobRunner(conf);
一般在本地编译时中在Idea中直接运行,会寻找到本地搭建的hadoop空壳,而在其中没有任何配置默认就会以local的方式进行提交的
public ClientProtocol create(Configuration conf) throws IOException {
String framework = conf.get("mapreduce.framework.name", "local");
if (!"local".equals(framework)) {
return null;
} else {
conf.setInt("mapreduce.job.maps", 1);
return new LocalJobRunner(conf);
}
}
public ClientProtocol create(InetSocketAddress addr, Configuration conf) {
return null;
}
7.YarnClientProtocolProvider(Yarn上运行),双参调用单参,由一个三元运算符来判断返回的是new YARNRunner(conf) 还是null
一般在集群中以hadoop jar 或yarn jar 的方式提交会寻找到集群中的配置文件来识别出这是以yarn的方式进行提交的
public ClientProtocol create(Configuration conf) throws IOException {
//8.集群中提交作业会返回new YARNRunner(conf)
return "yarn".equals(conf.get("mapreduce.framework.name")) ? new YARNRunner(conf) : null;
}
public ClientProtocol create(InetSocketAddress addr, Configuration conf) throws IOException {
return this.create(conf);
}
8.YARNRunner,yarn的提交机制有三层,单参,双参,三参的指定调用,最后都会指定到三参的调用中
public YARNRunner(Configuration conf) {
//调用双参
this(conf, new ResourceMgrDelegate(new YarnConfiguration(conf)));
}
public YARNRunner(Configuration conf, ResourceMgrDelegate resMgrDelegate) {
//调用三参
this(conf, resMgrDelegate, new ClientCache(conf, resMgrDelegate));
}
//此为三参构造
public YARNRunner(Configuration conf, ResourceMgrDelegate resMgrDelegate, ClientCache clientCache) {
this.conf = conf;
try {
//代理对象赋值
this.resMgrDelegate = resMgrDelegate;
this.clientCache = clientCache;
//联系FileContext
this.defaultFileContext = FileContext.getFileContext(this.conf);
} catch (UnsupportedFileSystemException var5) {
throw new RuntimeException("Error in instantiating YarnClient", var5);
}
}
到这,job初始化结束,
9.Job类的核心方法,也是submit()返回的方法,submitJobInternal方法,这个方法里开始了Job的工作
//构造方法部分
//文件系统、即运行此文件的各种文件
private FileSystem jtFs;
//客户端通信协议实例、用于集群交互、完成作业提交、作业状态查询
private ClientProtocol submitClient;
//主机名
private String submitHostName;
//主机地址
private String submitHostAddress;
//submitJobInternal方法
JobStatus submitJobInternal(Job job, Cluster cluster) throws ClassNotFoundException, InterruptedException, IOException {
//检查job的输出路径是否已存在,正确的是已配置,但是输出不存在
this.checkSpecs(job);
//获得配置
Configuration conf = job.getConfiguration();
//将配置添加到分布式缓存
addMRFrameworkToDistributedCache(conf);
Path jobStagingArea = JobSubmissionFiles.getStagingDir(cluster, conf);
//获取本机信息
InetAddress ip = InetAddress.getLocalHost();
//设置要提交作业的主机地址、主机名并设置配置信息conf
if (ip != null) {
this.submitHostAddress = ip.getHostAddress();
this.submitHostName = ip.getHostName();
//从配置设置主机名和主机地址
conf.set("mapreduce.job.submithostname", this.submitHostName);
conf.set("mapreduce.job.submithostaddress", this.submitHostAddress);
}
//生成jobid
JobID jobId = this.submitClient.getNewJobID();
//将jobid放入整个Job中
job.setJobID(jobId);
//构造提交作业路径
Path submitJobDir = new Path(jobStagingArea, jobId.toString());
JobStatus status = null;
JobStatus var25;
try {
//设置一些job作业参数
conf.set("mapreduce.job.user.name", UserGroupInformation.getCurrentUser().getShortUserName());
conf.set("hadoop.http.filter.initializers", "org.apache.hadoop.yarn.server.webproxy.amfilter.AmFilterInitializer");
conf.set("mapreduce.job.dir", submitJobDir.toString());
LOG.debug("Configuring job " + jobId + " with " + submitJobDir + " as the submit dir");
//获取路径的授权
TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[]{submitJobDir}, conf);
this.populateTokenCache(conf, job.getCredentials());
//方法是获取密钥和令牌用来访问集群,就像ssh,并将他们存储到令牌缓存TokenCache中
if (TokenCache.getShuffleSecretKey(job.getCredentials()) == null) {
KeyGenerator keyGen;
try {
keyGen = KeyGenerator.getInstance("HmacSHA1");
keyGen.init(64);
} catch (NoSuchAlgorithmException var20) {
throw new IOException("Error generating shuffle secret key", var20);
}
SecretKey shuffleKey = keyGen.generateKey();
TokenCache.setShuffleSecretKey(shuffleKey.getEncoded(), job.getCredentials());
}
if (CryptoUtils.isEncryptedSpillEnabled(conf)) {
conf.setInt("mapreduce.am.max-attempts", 1);
LOG.warn("Max job attempts set to 1 since encrypted intermediatedata spill is enabled");
}
//复制并且配置相关文件
this.copyAndConfigureFiles(job, submitJobDir);
//获取配置文件路径
Path submitJobFile = JobSubmissionFiles.getJobConfPath(submitJobDir);
LOG.debug("Creating splits at " + this.jtFs.makeQualified(submitJobDir));
//这个writeSplits是job切片方法,获得job分片==maptask的数量
int maps = this.writeSplits(job, submitJobDir);
conf.setInt("mapreduce.job.maps", maps);
LOG.info("number of splits:" + maps);
int maxMaps = conf.getInt("mapreduce.job.max.map", -1);
//当设定了最大的MapTask数,并实际开启Task数超过设定的时,会报错
if (maxMaps >= 0 && maxMaps < maps) {
throw new IllegalArgumentException("The number of map tasks " + maps + " exceeded limit " + maxMaps);
}
//获取传输队列queue
String queue = conf.get("mapreduce.job.queuename", "default");
//获取队列的访问权限控制列表
AccessControlList acl = this.submitClient.getQueueAdmins(queue);
conf.set(QueueManager.toFullPropertyName(queue, QueueACL.ADMINISTER_JOBS.getAclName()), acl.getAclString());
//清空缓存令牌
TokenCache.cleanUpTokenReferral(conf);
//根据参数据确定是否需要追踪令牌ID
if (conf.getBoolean("mapreduce.job.token.tracking.ids.enabled", false)) {
//通过job获取令牌id,并且缓存到trackingIds对象中
ArrayList<String> trackingIds = new ArrayList();
Iterator var15 = job.getCredentials().getAllTokens().iterator();
while(var15.hasNext()) {
Token<? extends TokenIdentifier> t = (Token)var15.next();
trackingIds.add(t.decodeIdentifier().getTrackingId());
}
conf.setStrings("mapreduce.job.token.tracking.ids", (String[])trackingIds.toArray(new String[trackingIds.size()]));
}
ReservationId reservationId = job.getReservationId();
if (reservationId != null) {
conf.set("mapreduce.job.reservation.id", reservationId.toString());
}
this.writeConf(conf, submitJobFile);
this.printTokens(jobId, job.getCredentials());
//ClientProtocol submitClient的submitJob方法提交JobID,作业的路径,证书/令牌验证,返回状态
//此处的submitClient其实就是上文的LocalJobRunner和YARNRunner
status = this.submitClient.submitJob(jobId, submitJobDir.toString(), job.getCredentials());
if (status == null) {
throw new IOException("Could not launch job");
}
var25 = status;
} finally {
if (status == null) {
LOG.info("Cleaning up the staging area " + submitJobDir);
if (this.jtFs != null && submitJobDir != null) {
//删除作业提交的相关资源目录
this.jtFs.delete(submitJobDir, true);
}
}
}
return var25;
}