package bd1805day09;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob;
import org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class ManyJob {
//多job串联,求共同好友
/**
* A:B,C,D,F,E,O
B:A,C,E,K
C:F,A,D,I
第一步将每个好友关注哪些用户
第二步将两两用户的共同好友求出来
创建两个MR可以和容易完成
*/
//第一个MapReduce
static class MyMapper extends Mapper<LongWritable, Text, Text, Text>{
@Override
protected void map(LongWritable key,
Text value,
Mapper<LongWritable, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
//A-B
//A:B,C,D,F,E,O
String line = value.toString();
String[] user_friends = line.split(":");
//获取的是所有的好友
String[] friends = user_friends[1].split(",");
//循环遍历好友 和用户拼接发送
for(String f:friends){
context.write(new Text(f), new Text(user_friends[0]));
}
}
}
static class MyReducer extends Reducer<Text, Text, Text, Text>{
@Override
protected void reduce(Text key, Iterable<Text> values,
Context context)
throws IOException, InterruptedException {
//同一个好友的所有用户,A:B,C,D
//将values进行循环遍历拼接
StringBuffer sb=new StringBuffer();
for(Text v:values){
sb.append(v.toString()).append(","); //A F,I,O,K,G,D,C,H,B
}
context.write(key, new Text(sb.substring(0,sb.length()-1)));
}
}
第二个MapReduce
static class MyMapper2 extends Mapper<LongWritable, Text, Text, Text>{
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
String line = value.toString();
String[] friend_users = line.split("\t");
String[] users = friend_users[1].split(",");
//A F,I,O,K,G,D,C,H,B
//循环遍历用户 进行两两拼接 拼接顺序问题 a-b b-a
for(String ul:users){//拼左侧的
for(String ur:users){//拼右侧的
if(ul.charAt(0)<ur.charAt(0)){
String uu=ul+"-"+ur;
System.out.println(uu);
context.write(new Text(uu), new Text(friend_users[0]));//A-E C
}
}
}
}
}
static class MyReducer2 extends Reducer<Text, Text, Text, Text>{
//相同的两两用户为一组
@Override
protected void reduce(Text key, Iterable<Text> values,
Reducer<Text, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
StringBuffer sb=new StringBuffer();
for(Text v:values){
sb.append(v.toString()).append(",");
}
context.write(key, new Text(sb.substring(0, sb.length()-1))); //A-E C,D,B
}
}
//创建驱动,加载job任务
public static void main(String[] args) throws IllegalArgumentException, IOException, URISyntaxException, InterruptedException {
System.setProperty("HADOOP_USER_NAME", "hadoop");
Configuration conf=new Configuration();
conf.set("fs.defaultFS", "hdfs://hadoop02:9000");
Job job1=Job.getInstance(conf); //创建第一个job
job1.setJarByClass(bd1805day09.ManyJob.class);
job1.setMapperClass(MyMapper.class);
job1.setReducerClass(MyReducer.class);
job1.setOutputKeyClass(Text.class);
job1.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(job1, new Path("hdfs://hadoop02:9000/friendin"));
FileSystem fs = FileSystem.get(new URI("hdfs://hadoop02:9000"), conf);//创建一个hdfs的文件系统
Path path = new Path("/friendout_01");
if(fs.exists(path)){
fs.delete(path,true);
}
FileOutputFormat.setOutputPath(job1, new Path("/friendout_01"));
Job job2=Job.getInstance(conf); //创建第二个job
job2.setJarByClass(bd1805day09.ManyJob.class);
job2.setMapperClass(MyMapper2.class);
job2.setReducerClass(MyReducer2.class);
job2.setOutputKeyClass(Text.class);
job2.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(job2, new Path("/friendout_01"));
Path path1=new Path("/friendout_03");
if(fs.exists(path1)){ //对所在路径下的文件清除
fs.delete(path1, true);
}
FileOutputFormat.setOutputPath(job2,path1);
//提交两个job 组:需要一起运行的job 组名随意
JobControl jc=new JobControl("wc_sort");
//job.xml
ControlledJob ajob=new ControlledJob(job1.getConfiguration());
ControlledJob bjob=new ControlledJob(job2.getConfiguration());
//需要添加多个job之间的依赖关系
bjob.addDependingJob(ajob);
//bjob.addDependingJob(cjob);
jc.addJob(ajob);
jc.addJob(bjob);
//提交job 启动可一个线程
new Thread(jc).start();
//关闭了这个线程 应该在上面的线程执行完成之后进行关闭
//判断jc对象上的job是否全部执行完成 执行完成 true 不完成 false
while(!jc.allFinished()){
Thread.sleep(500);
}
jc.stop();
}
}