hadoop之多job串联（共同好友问题）

最新推荐文章于 2023-05-09 20:39:51 发布

优雅程序员

最新推荐文章于 2023-05-09 20:39:51 发布

阅读量237

点赞数

文章标签：大数据

本文链接：https://blog.csdn.net/weixin_45433525/article/details/105173292

版权

多job串联

当你的程序中有多个job，并且多个job之间需要相互依赖job1 job2 job3 假设job3需要依赖job1 job2的执行结果的时候我们就需要使用多job串联
//会将多个job当做一个组中的job提交参数指的是组名随意
JobControl jc=new JobControl(“common_friend”);
//将原生的job转换为了可控制的job
ControlledJob ajob=new ControlledJob(job.getConfiguration());
ControlledJob bjob=new ControlledJob(job2.getConfiguration());
//添加依赖关系
bjob.addDependingJob(ajob);
//添加job到组中
jc.addJob(ajob);
jc.addJob(bjob);
//启动一个线程
Thread t=new Thread(jc);
t.start();
while(!jc.allFinished()){
t.sleep(500);
}
t.stop();
背景：处理找共同好友问题。
A:B,C,D,F,E,O
B:A,C,E,K
C:F,A,D,I
D:A,E,F,L
E:B,C,D,M,L
F:A,B,C,D,E,O,M
G:A,C,D,E,F
H:A,C,D,E,O
I:A,O
J:B,O
K:A,C,D
L:D,E,F
M:E,F,G
O:A,H,I,J,K

以上是数据：
A:B,C,D,F,E,O
表示：B,C,D,E,F,O是A用户的好友。

1、求所有两两用户之间的共同好友：
解决这个问题方法：map端，取到数据拆分右侧的好友，并且左侧的用户和好友一一拼接。reduce端，第一个mapreduce将好友的同一组用户封装到一个迭代器中，再将迭代器中的数据取出进行拼接。第二个mapreduce 将第一个job结果数据的右侧数据进行了两两拼接。此题第二个job依赖第一个job。

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob;
import org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;


public class CommonFriend {
	//每当启动一个maptask任务   会执行一个程序  就会找到这个类
	//B：A
	static class MyMapper_Step01 extends Mapper<LongWritable, Text, Text, Text>{
		Text k=new Text();
		Text v=new Text();
		//一行调用一次
		@Override
		protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
				throws IOException, InterruptedException {
			/*
			 * 取到数据拆分右侧的好友    并且左侧的用户一一拼接
			 */
			//两个元素     用户A    好友B,C,D,F,E,O
			String[] user_friends = value.toString().split(":");
			String[] friends = user_friends[1].split(",");
			for(String f:friends){
				//key:B    value：A
				k.set(f);
				v.set(user_friends[0]);
				//每次调用这个函数数据都被写出   每调用一次这个方法写出一次
				context.write(k, v);
			}
		}
		
	}
	
	//c	a，b，h
	static class MyReducer_Step01 extends Reducer<Text, Text, Text, Text>{
		Text t=new Text();
		//接收到的一组是有这个好友别的所有用户
		@Override
		protected void reduce(Text key, Iterable<Text> values, Reducer<Text, Text, Text, Text>.Context context)
				throws IOException, InterruptedException {
			StringBuffer sb=new StringBuffer();
			for(Text v:values){
				sb.append(v.toString()).append(",");
			}
			t.set(sb.substring(0,sb.length()-1));
			context.write(key, t);
		}
	}
	
	//A	F,I,O,K,G,D,C,H,B
	//key:f-i     value:a
	static class MyMapper_Step02 extends Mapper<LongWritable, Text, Text, Text>{
		Text keyout=new Text();
		Text valueout=new Text();
		@Override
		protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
				throws IOException, InterruptedException {
			String[] friend_user = value.toString().split("\t");
			String[] users = friend_user[1].split(",");
			for(String u1:users){
				for(String u2:users){
					if(u1.compareTo(u2)<0){
						String k=u1+"-"+u2;
						keyout.set(k);
						valueout.set(friend_user[0]);
						context.write(keyout, valueout);
					}
				}
			}
		}
		
	}
	
	static class MyReducer_Step02 extends Reducer<Text, Text, Text, Text>{
		Text valueout=new Text();
		@Override
		protected void reduce(Text key, Iterable<Text> values, Reducer<Text, Text, Text, Text>.Context context)
				throws IOException, InterruptedException {
			//key:f-i     value:a          那两个用户有的所有共同好友
			StringBuffer sb=new StringBuffer();
			for(Text v:values){
				sb.append(v).append(",");
			}
			valueout.set(sb.substring(0,sb.length()-1));
			context.write(key, valueout);
		}
		
	}
	
	
	public static void main(String[] args) throws IllegalArgumentException, IOException, ClassNotFoundException, InterruptedException {
		Configuration conf=new Configuration();
		System.setProperty("HADOOP_USER_NAME", "hadoop");
		conf.set("fs.defaultFS", "hdfs://hadoop01:9000");
		
		//job01的配置
		Job job=Job.getInstance(conf);
		
		job.setJarByClass(CommonFriend.class);
		
		
		job.setMapperClass(MyMapper_Step01.class);
		job.setReducerClass(MyReducer_Step01.class);
		
		
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(Text.class);
		
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);
		
		FileInputFormat.addInputPath(job, new Path("/commonfriend"));
		FileOutputFormat.setOutputPath(job, new Path("/friend_step01_0001"));
		
		
		
		
		//job02的配置
		Job job2=Job.getInstance(conf);
		
		job2.setJarByClass(CommonFriend.class);
		
		
		job2.setMapperClass(MyMapper_Step02.class);
		job2.setReducerClass(MyReducer_Step02.class);
		
		
		job2.setMapOutputKeyClass(Text.class);
		job2.setMapOutputValueClass(Text.class);
		
		job2.setOutputKeyClass(Text.class);
		job2.setOutputValueClass(Text.class);
		
		FileInputFormat.addInputPath(job2, new Path("/friend_step01_001"));
		FileOutputFormat.setOutputPath(job2, new Path("/friend_step02_002"));
		
		//会将多个job当做一个组中的job提交  参数指的是组名  随意
		JobControl jc=new JobControl("common_friend");
		//将原生的job转换为了可控制的job
		ControlledJob ajob=new ControlledJob(job.getConfiguration());
		ControlledJob bjob=new ControlledJob(job2.getConfiguration());
		//添加依赖关系
		bjob.addDependingJob(ajob);
		//添加job到组中
		jc.addJob(ajob);
		jc.addJob(bjob);
		//启动一个线程
		Thread t=new Thread(jc);
		t.start();
		while(!jc.allFinished()){
			t.sleep(500);
		}
		t.stop();
	}

}

优雅程序员

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
hadoop之多job串联（共同好友问题）

多job串联当你的程序中有多个job，并且多个job之间需要相互依赖job1 job2 job3 假设job3需要依赖job1 job2的执行结果的时候我们就需要使用多job串联//会将多个job当做一个组中的job提交参数指的是组名随意JobControl jc=new JobControl(“common_friend”);//将原生的job...
复制链接

扫一扫