Hadoop的JobControl设计及用法

JobControl设计及用法


1、JobControl设计原理分析:
JobControl由两个类组成:Job和JobControl。
Job类封装了一个MapReduce作业及其对应的依赖关系,主要负责监控各个依赖作业的运行状态,一次更新自己的状态。
作业刚开始处于WAITING状态。如果没有依赖作业或者所有作业均已运行完成,则进入READY状态。一旦进入REDAY状态,则作业可被提交到Hadoop集群上运行,并进入RUNNING状态。在RUNNING状态下,根据作业运行情况,可能进入SUCCESS或者FAILED状态。
需要注意的是,如果一个作业的依赖作业失败,则该作业也会失败,于是形成“多米诺骨牌效应”,后续所有作业均会失败。

  



旧API:



新API:



2、JobControl代码实现:

[java]  view plain  copy
  1. import java.io.File;  
  2. import java.io.IOException;  
  3. import java.util.HashSet;  
  4.   
  5. import org.apache.hadoop.conf.Configuration;  
  6. import org.apache.hadoop.fs.Path;  
  7. import org.apache.hadoop.io.LongWritable;  
  8. import org.apache.hadoop.io.Text;  
  9. import org.apache.hadoop.mapreduce.Job;  
  10. import org.apache.hadoop.mapreduce.Mapper;  
  11. import org.apache.hadoop.mapreduce.Reducer;  
  12. import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;  
  13. import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;  
  14. import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;  
  15. import org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob;  
  16. import org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl;  
  17. import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;  
  18. import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;  
  19. import org.apache.hadoop.util.GenericOptionsParser;  
  20.   
  21. import mapreduce.SegmentUtil;  
  22.   
  23. public class JobControlDemo {  
  24.     public static int main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {  
  25.         Configuration conf = new Configuration();  
  26.         String[] otherargs = new GenericOptionsParser(conf, args).getRemainingArgs();  
  27.         if (otherargs.length != 3) {  
  28.             System.err.println("Usage JobControlDemo <InputPath1> <InputPath1> <OutPath>");  
  29.             System.exit(2);  
  30.         }  
  31.   
  32.         // 创建基础作业  
  33.         Job job1 = Job.getInstance(conf, JobControlDemo.class.getSimpleName() + "1");  
  34.         Job job2 = Job.getInstance(conf, JobControlDemo.class.getSimpleName() + "2");  
  35.         Job job3 = Job.getInstance(conf, JobControlDemo.class.getSimpleName() + "3");  
  36.   
  37.         // Job1作业参数配置  
  38.         job1.setJarByClass(JobControlDemo.class);  
  39.         job1.setMapOutputKeyClass(Text.class);  
  40.         job1.setMapOutputValueClass(Text.class);  
  41.         job1.setOutputKeyClass(Text.class);  
  42.         job1.setOutputValueClass(Text.class);  
  43.         job1.setMapperClass(MyMapper1.class);  
  44.         job1.setReducerClass(MyReducer1.class);  
  45.         job1.setInputFormatClass(TextInputFormat.class);  
  46.         job1.setOutputFormatClass(TextOutputFormat.class);  
  47.         FileInputFormat.addInputPath(job1, new Path(otherargs[0]));  
  48.         FileOutputFormat.setOutputPath(job1, new Path(otherargs[2]+File.separator+"mid1"));  
  49.   
  50.         // Job2作业参数配置  
  51.         job2.setJarByClass(JobControlDemo.class);  
  52.         job2.setMapOutputKeyClass(Text.class);  
  53.         job2.setMapOutputValueClass(Text.class);  
  54.         job2.setOutputKeyClass(Text.class);  
  55.         job2.setOutputValueClass(Text.class);  
  56.         job2.setMapperClass(MyMapper2.class);  
  57.         job2.setReducerClass(MyReducer2.class);  
  58.         job2.setInputFormatClass(TextInputFormat.class);  
  59.         job2.setOutputFormatClass(TextOutputFormat.class);  
  60.         FileInputFormat.addInputPath(job2, new Path(otherargs[1]));  
  61.         FileOutputFormat.setOutputPath(job2, new Path(otherargs[2]+File.separator+"mid2"));  
  62.   
  63.         // Job3作业参数配置  
  64.         job3.setJarByClass(JobControlDemo.class);  
  65.         job3.setMapOutputKeyClass(Text.class);  
  66.         job3.setMapOutputValueClass(Text.class);  
  67.         job3.setOutputKeyClass(Text.class);  
  68.         job3.setOutputValueClass(Text.class);  
  69.         job3.setMapperClass(MyMapper3.class);  
  70.         job3.setReducerClass(MyReducer3.class);  
  71.         job3.setInputFormatClass(KeyValueTextInputFormat.class);  
  72.         job3.setOutputFormatClass(TextOutputFormat.class);  
  73.         FileInputFormat.addInputPath(job3, new Path(otherargs[2]+File.separator+"mid1"));  
  74.         FileInputFormat.addInputPath(job3, new Path(otherargs[2]+File.separator+"mid2"));  
  75.         FileOutputFormat.setOutputPath(job3, new Path(otherargs[2]+File.separator+"result"));  
  76.   
  77.         // 创建受控作业  
  78.         ControlledJob cjob1 = new ControlledJob(conf);  
  79.         ControlledJob cjob2 = new ControlledJob(conf);  
  80.         ControlledJob cjob3 = new ControlledJob(conf);  
  81.   
  82.         // 将普通作业包装成受控作业  
  83.         cjob1.setJob(job1);  
  84.         cjob2.setJob(job2);  
  85.         cjob3.setJob(job3);  
  86.   
  87.         // 设置依赖关系  
  88.         //cjob2.addDependingJob(cjob1);  
  89.         cjob3.addDependingJob(cjob1);  
  90.         cjob3.addDependingJob(cjob2);  
  91.   
  92.         // 新建作业控制器  
  93.         JobControl jc = new JobControl("My control job");  
  94.   
  95.         // 将受控作业添加到控制器中  
  96.         jc.addJob(cjob1);  
  97.         jc.addJob(cjob2);  
  98.         jc.addJob(cjob3);  
  99.   
  100.         /** 
  101.          * hadoop的JobControl类实现了线程Runnable接口。我们需要实例化一个线程来让它启动。直接调用JobControl的run()方法,线程将无法结束。 
  102.          */  
  103.         //jc.run();  
  104.           
  105.         Thread jcThread = new Thread(jc);    
  106.         jcThread.start();    
  107.         while(true){    
  108.             if(jc.allFinished()){    
  109.                 System.out.println(jc.getSuccessfulJobList());    
  110.                 jc.stop();    
  111.                 return 0;    
  112.             }    
  113.             if(jc.getFailedJobList().size() > 0){    
  114.                 System.out.println(jc.getFailedJobList());    
  115.                 jc.stop();    
  116.                 return 1;    
  117.             }    
  118.         }   
  119.     }  
  120.     /** 
  121.      * 第一个Job 
  122.      */  
  123.     public static class MyMapper1 extends Mapper<LongWritable, Text, Text, Text>{  
  124.         @Override  
  125.         protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)  
  126.                 throws IOException, InterruptedException {  
  127.             String[] spl1=value.toString().split("\t");  
  128.             if(spl1.length==2){  
  129.                 context.write(new Text(spl1[0].trim()), new Text(spl1[1].trim()));  
  130.             }  
  131.         }  
  132.     }  
  133.     public static class MyReducer1 extends Reducer<Text, Text, Text, Text>{  
  134.         @Override  
  135.         protected void reduce(Text k2, Iterable<Text> v2s, Reducer<Text, Text, Text, Text>.Context context)  
  136.                 throws IOException, InterruptedException {  
  137.             for (Text v2 : v2s) {  
  138.                 context.write(k2, v2);  
  139.             }  
  140.         }  
  141.     }  
  142.     /** 
  143.      * 第二个Job 
  144.      */  
  145.     public static class MyMapper2 extends Mapper<LongWritable, Text, Text, Text>{  
  146.         @Override  
  147.         protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)  
  148.                 throws IOException, InterruptedException {  
  149.             String[] spl2=value.toString().split("\t");  
  150.             if(spl2.length==2){  
  151.                 context.write(new Text(spl2[0].trim()), new Text(spl2[1].trim()));  
  152.             }  
  153.         }  
  154.     }  
  155.     public static class MyReducer2 extends Reducer<Text, Text, Text, Text>{  
  156.         @Override  
  157.         protected void reduce(Text k3, Iterable<Text> v3s, Reducer<Text, Text, Text, Text>.Context context)  
  158.                 throws IOException, InterruptedException {  
  159.             for (Text v3 : v3s) {  
  160.                 context.write(k3, v3);  
  161.             }  
  162.         }  
  163.     }  
  164.     /** 
  165.      * 第三个Job 
  166.      */  
  167.     public static class MyMapper3 extends Mapper<Text, Text, Text, Text>{  
  168.         @Override  
  169.         protected void map(Text key, Text value, Mapper<Text, Text, Text, Text>.Context context)  
  170.                 throws IOException, InterruptedException {  
  171.             context.write(key, value);  
  172.         }  
  173.     }  
  174.     public static class MyReducer3 extends Reducer<Text,Text, Text, Text>{  
  175.         @Override  
  176.         protected void reduce(Text k4, Iterable<Text> v4s,Reducer<Text, Text, Text, Text>.Context context)   
  177.                 throws IOException, InterruptedException {  
  178.             HashSet<String> hashSet=new HashSet<String>();  
  179.             for (Text v4 : v4s) {  
  180.                 hashSet.add(v4.toString().trim());  
  181.             }  
  182.             if(hashSet.size()>=2){  
  183.                 context.write(k4, new Text("OK"));  
  184.             }  
  185.         }  
  186.     }  
  187. }  

测试输入数据:

[java]  view plain  copy
  1. hdfs dfs -text /user/jiuqian/libin/input/inputpath1.txt  
  2. hadoop  a  
  3. spark   a  
  4. hive    a  
  5. hbase   a  
  6. tachyon a  
  7. storm   a  
  8. redis   a  

[java]  view plain  copy
  1. hdfs dfs -text /user/jiuqian/libin/input/inputpath2.txt  
  2. hadoop  b  
  3. spark   b  
  4. kafka   b  
  5. tachyon b  
  6. oozie   b  
  7. flume   b  
  8. sqoop   b  
  9. solr    b  

测试输出数据:

[java]  view plain  copy
  1.  hdfs dfs -text /user/jiuqian/libin/input/inputpathmerge2.txt/result/*  
  2. hadoop  OK  
  3. spark   OK  
  4. tachyon OK  

运行输出信息:

[java]  view plain  copy
  1. [sshexec] cmd : bash -c 'source  /home/jiuqian/.bashrc; /home/hduser/hadoop/bin/hadoop jar  /home/jiuqian/blb/JobControlDemo.jar -D mapreduce.map.java.opts=-Xmx2048m -D mapreduce.input.fileinputformat.split.minsize=1 -Dmapreduce.input.fileinputformat.split.maxsize=512000000 -D mapred.linerecordreader.maxlength=32768 /user/jiuqian/libin/input/inputpath1.txt /user/jiuqian/libin/input/inputpath2.txt /user/jiuqian/libin/input/inputpathmerge2.txt'  
  2. 16/02/27 12:37:45 INFO client.RMProxy: Connecting to ResourceManager at sh-rslog1/192.168.1.2:8032  
  3. 16/02/27 12:37:46 INFO input.FileInputFormat: Total input paths to process : 1  
  4. 16/02/27 12:37:46 INFO mapreduce.JobSubmitter: number of splits:1  
  5. 16/02/27 12:37:46 INFO Configuration.deprecation: mapred.linerecordreader.maxlength is deprecated. Instead, use mapreduce.input.linerecordreader.line.maxlength  
  6. 16/02/27 12:37:47 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1446086163035_17037  
  7. 16/02/27 12:37:47 INFO impl.YarnClientImpl: Submitted application application_1446086163035_17037  
  8. 16/02/27 12:37:47 INFO mapreduce.Job: The url to track the job: http://sh-rslog1:8088/proxy/application_1446086163035_17037/  
  9. 16/02/27 12:37:47 INFO client.RMProxy: Connecting to ResourceManager at sh-rslog1/27.115.29.102:8032  
  10. 16/02/27 12:37:47 INFO input.FileInputFormat: Total input paths to process : 1  
  11. 16/02/27 12:37:47 INFO mapreduce.JobSubmitter: number of splits:1  
  12. 16/02/27 12:37:47 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1446086163035_17038  
  13. 16/02/27 12:37:47 INFO impl.YarnClientImpl: Submitted application application_1446086163035_17038  
  14. 16/02/27 12:37:47 INFO mapreduce.Job: The url to track the job: http://sh-rslog1:8088/proxy/application_1446086163035_17038/  
  15. 16/02/27 12:38:13 INFO client.RMProxy: Connecting to ResourceManager at sh-rslog1/27.115.29.102:8032  
  16. 16/02/27 12:38:13 INFO input.FileInputFormat: Total input paths to process : 2  
  17. 16/02/27 12:38:13 INFO mapreduce.JobSubmitter: number of splits:2  
  18. 16/02/27 12:38:13 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1446086163035_17039  
  19. 16/02/27 12:38:13 INFO impl.YarnClientImpl: Submitted application application_1446086163035_17039  
  20. 16/02/27 12:38:13 INFO mapreduce.Job: The url to track the job: http://sh-rslog1:8088/proxy/application_1446086163035_17039/  
  21. [job name:  JobControlDemo1  
  22. job id: My control job0  
  23. job state:  SUCCESS  
  24. job mapred id:  job_1446086163035_17037  
  25. job message:    just initialized  
  26. job has no depending job:     
  27. , job name: JobControlDemo2  
  28. job id: My control job1  
  29. job state:  SUCCESS  
  30. job mapred id:  job_1446086163035_17038  
  31. job message:    just initialized  
  32. job has no depending job:     
  33. , job name: JobControlDemo3  
  34. job id: My control job2  
  35. job state:  SUCCESS  
  36. job mapred id:  job_1446086163035_17039  
  37. job message:    just initialized  
  38. job has 2 dependeng jobs:  
  39.      depending job 0:   JobControlDemo1  
  40.      depending job 1:   JobControlDemo2  
  41. ]  
  42. [INFO] Executed tasks  
  43. [INFO] ------------------------------------------------------------------------  
  44. [INFO] BUILD SUCCESS  
  45. [INFO] ------------------------------------------------------------------------  


3个Job运行过程:

开始前两个Job同时被调度:




前两个Job运行完之后,第三个Job开始运行:







版权声明:分享的快乐。。 https://blog.csdn.net/baolibin528/article/details/50754753
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值