多job串联
当你的程序中有多个job,并且多个job之间需要相互依赖job1 job2 job3 假设job3需要依赖job1 job2的执行结果的时候 我们就需要使用 多job串联
//会将多个job当做一个组中的job提交 参数指的是组名 随意
JobControl jc=new JobControl(“common_friend”);
//将原生的job转换为了可控制的job
ControlledJob ajob=new ControlledJob(job.getConfiguration());
ControlledJob bjob=new ControlledJob(job2.getConfiguration());
//添加依赖关系
bjob.addDependingJob(ajob);
//添加job到组中
jc.addJob(ajob);
jc.addJob(bjob);
//启动一个线程
Thread t=new Thread(jc);
t.start();
while(!jc.allFinished()){
t.sleep(500);
}
t.stop();
背景:处理找共同好友问题。
A:B,C,D,F,E,O
B:A,C,E,K
C:F,A,D,I
D:A,E,F,L
E:B,C,D,M,L
F:A,B,C,D,E,O,M
G:A,C,D,E,F
H:A,C,D,E,O
I:A,O
J:B,O
K:A,C,D
L:D,E,F
M:E,F,G
O:A,H,I,J,K
以上是数据:
A:B,C,D,F,E,O
表示:B,C,D,E,F,O是A用户的好友。
1、求所有两两用户之间的共同好友:
解决这个问题方法:map端,取到数据拆分右侧的好友,并且左侧的用户和好友一一拼接。reduce端,第一个mapreduce将好友的同一组用户封装到一个迭代器中,再将迭代器中的数据取出进行拼接。第二个mapreduce 将第一个job结果数据的右侧数据进行了两两拼接。此题第二个job依赖第一个job。
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob;
import org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class CommonFriend {
//每当启动一个maptask任务 会执行一个程序 就会找到这个类
//B:A
static class MyMapper_Step01 extends Mapper<LongWritable, Text, Text, Text>{
Text k=new Text();
Text v=new Text();
//一行调用一次
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
/*
* 取到数据拆分右侧的好友 并且左侧的用户一一拼接
*/
//两个元素 用户A 好友B,C,D,F,E,O
String[] user_friends = value.toString().split(":");
String[] friends = user_friends[1].split(",");
for(String f:friends){
//key:B value:A
k.set(f);
v.set(user_friends[0]);
//每次调用这个函数数据都被写出 每调用一次这个方法写出一次
context.write(k, v);
}
}
}
//c a,b,h
static class MyReducer_Step01 extends Reducer<Text, Text, Text, Text>{
Text t=new Text();
//接收到的一组是有这个好友别的所有用户
@Override
protected void reduce(Text key, Iterable<Text> values, Reducer<Text, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
StringBuffer sb=new StringBuffer();
for(Text v:values){
sb.append(v.toString()).append(",");
}
t.set(sb.substring(0,sb.length()-1));
context.write(key, t);
}
}
//A F,I,O,K,G,D,C,H,B
//key:f-i value:a
static class MyMapper_Step02 extends Mapper<LongWritable, Text, Text, Text>{
Text keyout=new Text();
Text valueout=new Text();
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
String[] friend_user = value.toString().split("\t");
String[] users = friend_user[1].split(",");
for(String u1:users){
for(String u2:users){
if(u1.compareTo(u2)<0){
String k=u1+"-"+u2;
keyout.set(k);
valueout.set(friend_user[0]);
context.write(keyout, valueout);
}
}
}
}
}
static class MyReducer_Step02 extends Reducer<Text, Text, Text, Text>{
Text valueout=new Text();
@Override
protected void reduce(Text key, Iterable<Text> values, Reducer<Text, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
//key:f-i value:a 那两个用户有的所有共同好友
StringBuffer sb=new StringBuffer();
for(Text v:values){
sb.append(v).append(",");
}
valueout.set(sb.substring(0,sb.length()-1));
context.write(key, valueout);
}
}
public static void main(String[] args) throws IllegalArgumentException, IOException, ClassNotFoundException, InterruptedException {
Configuration conf=new Configuration();
System.setProperty("HADOOP_USER_NAME", "hadoop");
conf.set("fs.defaultFS", "hdfs://hadoop01:9000");
//job01的配置
Job job=Job.getInstance(conf);
job.setJarByClass(CommonFriend.class);
job.setMapperClass(MyMapper_Step01.class);
job.setReducerClass(MyReducer_Step01.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path("/commonfriend"));
FileOutputFormat.setOutputPath(job, new Path("/friend_step01_0001"));
//job02的配置
Job job2=Job.getInstance(conf);
job2.setJarByClass(CommonFriend.class);
job2.setMapperClass(MyMapper_Step02.class);
job2.setReducerClass(MyReducer_Step02.class);
job2.setMapOutputKeyClass(Text.class);
job2.setMapOutputValueClass(Text.class);
job2.setOutputKeyClass(Text.class);
job2.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(job2, new Path("/friend_step01_001"));
FileOutputFormat.setOutputPath(job2, new Path("/friend_step02_002"));
//会将多个job当做一个组中的job提交 参数指的是组名 随意
JobControl jc=new JobControl("common_friend");
//将原生的job转换为了可控制的job
ControlledJob ajob=new ControlledJob(job.getConfiguration());
ControlledJob bjob=new ControlledJob(job2.getConfiguration());
//添加依赖关系
bjob.addDependingJob(ajob);
//添加job到组中
jc.addJob(ajob);
jc.addJob(bjob);
//启动一个线程
Thread t=new Thread(jc);
t.start();
while(!jc.allFinished()){
t.sleep(500);
}
t.stop();
}
}