前言
一个任务中包含多个mapreduce任务(job),多个job之间有相互的依赖关系的
需要进行多job串联
操作步骤:
//将我们的普通的job转成Controlledjob
//job.getCon/getLong... 都是来自job.xml
ControlledJob cjob1=new ControlledJob(job1.getConfiguration());
ControlledJob cjob2=new ControlledJob(job2.getConfiguration());
//添加job间依赖关系 cjob2-->cjob1
cjob2.addDependingJob(cjob1);
jc.addJob(cjob1);
jc.addJob(cjob2);
//提交jc 启动线程
Thread t=new Thread(jc);
t.start();
//判断所有的job是否执行完成 true 完成 false 没有完成
while(!jc.allFinished()){
Thread.sleep(100);
}
//结束了 停止
jc.stop();
多job串联案例
求共同好友
数据如下:user_friends.data
用户:好友
A:B,C,D,F,E,O
B:A,C,E,K
C:F,A,D,I
D:A,E,F,L
E:B,C,D,M,L
F:A,B,C,D,E,O,M
G:A,C,D,E,F
H:A,C,D,E,O
I:A,O
J:B,O
K:A,C,D
L:D,E,F
M:E,F,G
O:A,H,I,J,K
数据解释:
A:B,C,D,F,E,O
表示:B,C,D,E,F,O是A用户的好友。
需求:求所有两两用户之间的共同好友?
分析:
正着看,左边是用户,右边是用户的所有好友
这样去找关系好像不太好找。
我们再仔细分析题目,找每两两用户的共同好友,我们反过来想:
- 第一步:先以好友为key,求出某个好友的所有用户
- 第二步:然后以这些用户两两分组,好友作为value,这些value就是这两个用户共同好友
CommonFriends.java
public class CommonFriends {
static class CommonFriendsMapper1 extends Mapper<LongWritable, Text,Text,Text>{
Text mk = new Text();
Text mv = new Text();
/**
* O:A,H,I,J,K
* A O
* H O
* I O
* J O
* K O
*/
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
// O:A,H,I,J,K
String[] user_friends = value.toString().split(":");
// 切分所有好友
String[] friends = user_friends[1].split(",");
for (String f:friends) {
mk.set(f);
mv.set(user_friends[0]);
context.write(mk,mv);
}
}
}
static class CommonFriendsReducer1 extends Reducer<Text,Text,Text,Text>{
Text rv = new Text();
/**
* 同一个好友的所有用户
*/
@Override
protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
StringBuffer sb = new StringBuffer();
for (Text u:values) {
sb.append(u).append(",");
}
rv.set(sb.substring(0,sb.length()-1));
//O A,B,C
context.write(key,rv);
}
}
static class CommonFriendsMapper2 extends Mapper<LongWritable, Text,Text,Text>{
Text mk=new Text();
Text mv=new Text();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
//O A,B,C
String[] friend_users = value.toString().split("\t");
String[] users = friend_users[1].split(",");
/**
* 拼接两两用户
* 拼接需要顺序的,两种方法
* 1)先对所有的用户排序
* 【B C D】
* B-C B-D C-D
* 2)全部循环 拼接的时候进行过滤
* b-c b-d b-b
*/
for (String u1:users){
for (String u2: users){
if(u1.compareTo(u2)<0){
String res = u1+"-"+u2;
mk.set(res);
mv.set(friend_users[0]);
//B-C O
context.write(mk,mv);
}
}
}
}
}
static class CommonFriendsReducer2 extends Reducer<Text,Text,Text,Text>{
Text rv = new Text();
/**
* 两两用户的所有共同好友
*
*/
@Override
protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
StringBuffer sb = new StringBuffer();
for (Text v:values) {
sb.append(v.toString()).append(",");
}
rv.set(sb.substring(0,sb.length()-1));
context.write(key,rv);
}
}
public static void main(String[] args) throws IOException, InterruptedException {
System.setProperty("HADOOP_USER_NAME","hdp01");
Configuration conf = new Configuration();
// conf.set("mapperduce.framework.name","local");
conf.set("fs.defaultFS","hdfs://10.211.55.20:9000");
System.out.println("============job1==================");
Job job1 = Job.getInstance(conf);
job1.setJarByClass(CommonFriends.class);
job1.setMapperClass(CommonFriendsMapper1.class);
job1.setReducerClass(CommonFriendsReducer1.class);
job1.setOutputKeyClass(Text.class);
job1.setOutputValueClass(Text.class);
FileInputFormat.setInputPaths(job1,new Path("/tmpin/commonFriends/user_friends.data"));
//指定输出路径 输出路径不能存在 否则报错
FileSystem fs= FileSystem.get(conf);
//当输出目录存在 删除
Path outpath=new Path("/tmpout/commonFriends/out1");
if(fs.exists(outpath)){//存在 删除
fs.delete(outpath,true);
}
FileOutputFormat.setOutputPath(job1, outpath);
System.out.println("=============job2=================");
Job job2 = Job.getInstance(conf);
job2.setJarByClass(CommonFriends.class);
job2.setMapperClass(CommonFriendsMapper2.class);
job2.setReducerClass(CommonFriendsReducer2.class);
job2.setOutputKeyClass(Text.class);
job2.setOutputValueClass(Text.class);
FileInputFormat.setInputPaths(job2,new Path("/tmpout/commonFriends/out1"));
//指定输出路径 输出路径不能存在 否则报错
Path outpath2=new Path("/tmpout/commonFriends/out2");
if(fs.exists(outpath2)){//存在 删除
fs.delete(outpath2,true);
}
FileOutputFormat.setOutputPath(job2, outpath2);
System.out.println("============提交jobs==================");
/**
* 用于封装一并运行的jobs的
* 将多个job放在一个组中
*/
JobControl jc=new JobControl("common_friend");
/**
* 将job添加到这个组中
* 组中添加的job一定是Ctrolledjob
* 将我们的普通的job转成ontrolledjob
* job.getConfiguration job.xml
*/
ControlledJob cjob1=new ControlledJob(job1.getConfiguration());
ControlledJob cjob2=new ControlledJob(job2.getConfiguration());
//添加依赖关系 cjob2-->cjob1
cjob2.addDependingJob(cjob1);
jc.addJob(cjob1);
jc.addJob(cjob2);
//提交jc 启动线程
Thread t=new Thread(jc);
t.start();
//判断所有的job是否执行完成 true 完成 false 没有完成
while(!jc.allFinished()){
Thread.sleep(100);
}
//结束
jc.stop();
}
}
第一次输出:
[hdp01@hdp01 ~]$ hdfs dfs -cat /tmpout/commonFriends/out1/part-r-00000
A F,I,O,K,G,D,C,H,B
B E,J,F,A
C B,E,K,A,H,G,F
D H,C,G,F,E,A,K,L
E A,B,L,G,M,F,D,H
F C,M,L,A,D,G
G M
H O
I O,C
J O
K O,B
L D,E
M E,F
O A,H,I,J,F
最终输出结果:
[hdp01@hdp01 tmpfiles]$ hdfs dfs -cat /tmpout/commonFriends/out2/part-r-00000
A-B E,C
A-C D,F
A-D F,E
A-E C,D,B
A-F O,B,E,D,C
A-G E,F,D,C
A-H O,E,D,C
A-I O
A-J B,O
A-K D,C
A-L D,F,E
A-M E,F
B-C A
B-D E,A
B-E C
B-F A,E,C
B-G C,A,E
B-H A,E,C
B-I A
B-K C,A
B-L E
B-M E
B-O A,K
C-D A,F
C-E D
C-F D,A
C-G F,A,D
C-H D,A
C-I A
C-K A,D
C-L F,D
C-M F
C-O I,A
D-E L
D-F E,A
D-G A,F,E
D-H E,A
D-I A
D-K A
D-L F,E
D-M F,E
D-O A
E-F C,B,M,D
E-G C,D
E-H C,D
E-J B
E-K D,C
E-L D
F-G A,D,C,E
F-H A,E,C,D,O
F-I O,A
F-J O,B
F-K C,A,D
F-L E,D
F-M E
F-O A
G-H A,C,D,E
G-I A
G-K C,A,D
G-L D,E,F
G-M F,E
G-O A
H-I O,A
H-J O
H-K A,D,C
H-L E,D
H-M E
H-O A
I-J O
I-K A
I-O A
K-L D
K-O A
L-M F,E