2021SC@SDUSC
概述
本次分析pig作为hadoop的轻量级脚本语言操作hadoop的executionengine包下的MapReduceLauncher类的代码
MapReduceLauncher类继承自launcher类
该类作为MapReducer启动pig的主类
getError方法
获取导致存储位置后端失败的异常(如果有的话)
public Exception getError(FileSpec spec) {
return failureMap.get(spec);
}
launchPig方法
这是一个通用的JobClient,用于检查Job的进度
public PigStats launchPig(PhysicalPlan php,
String grpName,
PigContext pc) throws PlanException,
VisitorException,
IOException,
ExecException,
JobCreationException,
Exception {
JobClient statsJobClient = new JobClient(exe.getJobConf());
JobControlCompiler jcc = new JobControlCompiler(pc, conf, ConfigurationUtil.toConfiguration(defaultProperties));
MRScriptState.get().addWorkflowAdjacenciesToConf(mrp, conf);
}
用来开始收集统计信息
PigStats.start(pc.getExecutionEngine().instantiatePigStats());
MRPigStatsUtil.startCollection(pc, statsJobClient, jcc, mrp);
查找所有中间数据存储,因为在编译/执行期间将会把该计划删除,因此需要提前完成,所有数据的存储
MRIntermediateDataVisitor intermediateVisitor = new MRIntermediateDataVisitor(mrp);
intermediateVisitor.visit();
List<Job> failedJobs = new LinkedList<Job>();
List<NativeMapReduceOper> failedNativeMR = new LinkedList<NativeMapReduceOper>();
List<Job> completeFailedJobsInThisRun = new LinkedList<Job>();
List<Job> succJobs = new LinkedList<Job>();
int totalMRJobs = mrp.size();
int numMRJobsCompl = 0;
double lastProg = -1;
long scriptSubmittedTimestamp = System.currentTimeMillis();
为作业控制线程创建异常处理程序,并给作业控制线程注册处理程序
JobControlThreadExceptionHandler jctExceptionHandler = new JobControlThreadExceptionHandler();
boolean stop_on_failure =
Boolean.valueOf(pc.getProperties().getProperty("stop.on.failure", "false"));
boolean stoppedOnFailure = false;
标记作业的提交时间,以便将其反映在作业历史记录中
for (Job job : jc.getWaitingJobs()) {
JobConf jobConfCopy = job.getJobConf();
jobConfCopy.set("pig.script.submitted.timestamp",
Long.toString(scriptSubmittedTimestamp));
jobConfCopy.set("pig.job.submitted.timestamp",
Long.toString(System.currentTimeMillis()));
job.setJobConf(jobConfCopy);
}
通过频繁轮询已完成的作业来收集作业统计信息
MRPigStatsUtil.accumulateStats(jc);
如果启用了stop_on_failure,我们需要在任何作业失败时立即停止
stoppedOnFailure = stopJobsOnFailure(stop_on_failure);
stopJobsOnFailure方法
如果启用了stop_on_failure,并且任何作业都已失败,则会停止其他所有作业。
private boolean stopJobsOnFailure(boolean stop_on_failure) throws IOException, InterruptedException {
if (jc.getFailedJobs().isEmpty())
return false;
if (stop_on_failure) {
List<ControlledJob> readyJobsList = jc.getReadyJobsList();
List<ControlledJob> runningJobList = jc.getRunningJobList();
if (readyJobsList.size() > 0 || runningJobList.size() > 0) {
log.info("Some job(s) failed. Failing other ready and running jobs as -stop_on_failure is on");
for (ControlledJob job : readyJobsList) {
job.failJob("Failing ready job for -stop_on_failure: " + job.getMapredJobId());
}
for (ControlledJob job : runningJobList) {
job.failJob("Failing running job for -stop_on_failure: " + job.getMapredJobId());
}
}
return true;
}
return false;
}