JobControl的使用##
####1.JobControl配置 ##
//1.首先声明一个JobControl
JobControl jobControl = new JobControl("groupName");
//2.然后写conf的配置
Configuration conf = new Configuration();
conf.set("name","value");
...省略配置...
...一般这里会判断输出路径是否存在...
FileSystem fileSystem = FileSystem.get(new Configuration());
if (fileSystem.exists(new Path(outputPath))) {
LOG.warn("output: " + outputPath + " already exists! DELETE");
fileSystem.delete(new Path(outputPath), true);
}
//3.开始写job配置
Job job = Job.getInstance(conf);
trackViewJob.setJarByClass(xxx.class);
trackViewJob.setJobName("jobName");
trackViewJob.setInputFormatClass(OrcNewInputFormat.class);
for(String date: dateList) {
//这里最好加一个输入路径是否存在的判断
MultipleInputs.addInputPath(job, new Path(inputPath), RCFileMapReduceInputFormat.class, TrackDataMapper.class);
}
job.setMapperClass(TrackDataMapper.class);
job.setReducerClass(TrackDataReducer.class);
job.setMapOutputKeyClass(TextTuple.class);
job.setMapOutputValueClass(TextTuple.class);
job.setNumReduceTasks(5000);
job.setOutputKeyClass(TextTuple.class);
job.setOutputValueClass(TextTuple.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
SequenceFileOutputFormat.setOutputPath(job, new Path(outputPath));
//4.job写完,创建ControlledJob
//job加入ControlledJob
ControlledJob cj = new ControlledJob(conf);
cj.setJob(job);
//5.如果有其他依赖,则把依赖写在这里,例如
cj.addDependingJob(other_cj1);
cj.addDependingJob(other_cj2);
//表示当前cj依赖于其他other_cj完成,他才能提交运行,有多个就add多个
//如果他不需要其他程序运行完在运行,则这里可以不写
//6.把刚创建的 ControlledJob加入 JobControl
jobControl.addJob(cj);
以上完成JobControl配置
下面运行
####2.JobControl运行 ##
应该有直接运行的方法。下面是自己写的方法
直接贴代码了,不多说。
调用方法:
boolean res = RunTool.runJobControll(job, true);
package xxx;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob;
import org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl;
import org.apache.log4j.Logger;
import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class RunTool {
private static final Logger LOG = Logger.getLogger(RunTool.class);
public static boolean runJobControll(JobControl jobControl, boolean verbose)
throws InterruptedException, IOException {
LOG.info("start to run job control with model" + (verbose ? " verbose" : " clean"));
Thread runningThread = new Thread(jobControl);
runningThread.start();
Map<String, Integer> jobProgress = null;
if (verbose) {
jobProgress = new HashMap<String, Integer>();
}
int numWait = -1;
int numRunning = -1;
int numSuccess = -1;
int numFailed = -1;
while (!jobControl.allFinished()) {
Thread.sleep(10 * 1000);
if (verbose) {
int wait = jobControl.getWaitingJobList().size();
int running = jobControl.getRunningJobList().size();
int success = jobControl.getSuccessfulJobList().size();
int failed = jobControl.getFailedJobList().size();
if (numWait != wait) {
numWait = wait;
LOG.info("job control state alert -- waiting jobs: " + numWait);
}
if (numRunning != running) {
numRunning = running;
LOG.info("job control state alert -- running jobs: " + numRunning);
}
if (numSuccess != success) {
numSuccess = success;
LOG.info("job control state alert -- successful jobs: " + numSuccess);
}
if (failed != numFailed) {
numFailed = failed;
LOG.info("job control state alert -- failed jobs: " + numFailed);
}
if (failed > 0) {
jobControl.stop();
LOG.info("some controlled job failed! stop the job control");
LOG.info("stop all running jobs");
for (ControlledJob wcj: jobControl.getRunningJobList()) {
LOG.info("killing job: " + wcj.getJobName());
wcj.killJob();
}
break;
}
for (ControlledJob cj: jobControl.getRunningJobList()) {
String jobId = cj.getJobID();
Job job = cj.getJob();
int currentJobProgress = (int) (100 * (0.5 * job.mapProgress() + 0.5 * job.reduceProgress()));
if (!jobProgress.containsKey(jobId) || jobProgress.get(jobId) != currentJobProgress) {
LOG.info("Controlled Job Alert -- job: " + job.getJobName() +
", progress: " + currentJobProgress + "%" +
", track url: " + job.getTrackingURL());
jobProgress.put(jobId, currentJobProgress);
}
}
}
}
List<ControlledJob> failedList = jobControl.getFailedJobList();
for (ControlledJob fcj: failedList) {
LOG.error("job: " + fcj.getJobName() + " failed!");
}
return failedList.size() == 0;
}
}
####3.JobControl成功或失败判断 ##
if (res) {
//成功,获取计数器的值
List<ControlledJob> finishList = job.getSuccessfulJobList();
for (ControlledJob controlledJob : finishList) {
Counters counters = controlledJob.getJob().getCounters();
//**下面这部分输出所有的计数器,调试用,不管什么,大大小小的统计一并输出
Iterable<String> gcList = counters.getGroupNames(); //获取所有计数器名字
for (String counter_name : gcList) {
CounterGroup gc = counters.getGroup(counter_name);
cnt.add( counter_name +"# start ~~~~~");
for (Counter counter : gc) {
cnt.add( counter.getName() +"#"+counter.getValue());
LOG.info( counter.getName() + "\t" + counter.getValue() );
}
cnt.add( counter_name +"# end ~~~~~");
}
/**最终要的结果,只有每部分输出以及最终输出统计
CounterGroup gc = counters.getGroup(Consts.COUNTER_NAME);
for (Counter counter : gc) {
cnt.add( counter.getName() +"#"+counter.getValue());
LOG.info( counter.getName() + "\t" + counter.getValue() );
}
**/
}
}else {
//失败,获取失败的任务名字
List<ControlledJob> failedList = job.getFailedJobList();
for (ControlledJob fail : failedList) {
failed.add( fail.getJob().getJobName() );
LOG.info("### Failed:\t" + fail.getJob().getJobName());
}
}
如果是多线程,那么需要一个锁。
synchronized (this) {
isSucc = res;
isFinished = true;
}
####4.JobControl获取计数器值 ##
代码在3中都有,既可以获取指定计数器的值,也可以获取所有mapreduce 计数器的值
Iterable<String> gcList = counters.getGroupNames(); //获取所有计数器名字,这个包括了mapreduce所有的计数器,内部计数器那些
for (String counter_name : gcList) {
//又起要注意,这里迭代出的是所有计数器
//不止用户自定义的计数器,还包括程序输入输出等一些内部计数器
}