MRjob串联之共同好友
数据
描述:A的好友有B,C,D,F,R
A:B,C,D,F,R
B:B,D,R
C:A,G,U,X,D,F,R
D:B,F,D,G,R
E:A,D,F,R
F:B,C,D,T,Y,F,R
G:A,C,Z,K,R
H:B,C,F,G,L,R
I:B,L,D,R,E
J:B,D,G,F,R
K:S,B,C,A
要求:找出A,B的共同好友,找出A,C的共同好友…(所有人两两间的共同好友)
逻辑分析
对A,B的共同好友进行举例
Map1->FriendMap1.java
package day4_jobs_input.jobs.friends;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
/**
* friendmap1
* @author Fantome
* @date 2019/5/28
*/
public class FriendMap1 extends Mapper<LongWritable, Text,Text,Text> {
Text k=new Text();
Text v=new Text();
/**
* 将本人于好友分开,并反转本人和好友k,v位置,转化成好友->本人的形式
* @param key
* @param value
* @param context
* @throws IOException
* @throws InterruptedException
*/
@Override
protected void map(LongWritable key,
Text value,
Context context) throws IOException, InterruptedException {
String line = value.toString();
//splits[0]->本人,splits[1]->好友(多个)
String[] splits = line.split(":");
String person = splits[0];
v.set(person);
// 将splits[1]按分开
String[] friends = splits[1].split(",");
for (String friend:friends){
// 反转k和v,输出到reduce端 k:好友(单个),v:本人
k.set(friend);
context.write(k,v);
}
}
}
Reduce1->FriendReduce1.java
package day4_jobs_input.jobs.friends;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
/**
* friendreduce1
* @author Fantome
* @date 2019/5/28
*/
public class FriendReduce1 extends Reducer<Text,Text,Text,Text> {
Text v=new Text();
/**
* //聚合 k:好友,v:本人(多个)
* @param key k:好友
* @param values v:本人
* @param context
* @throws IOException
* @throws InterruptedException
*/
@Override
protected void reduce(Text key,
Iterable<Text> values,
Context context) throws IOException, InterruptedException {
StringBuffer persons = new StringBuffer();
//聚合 k:好友,v:本人(多个)
for (Text person:values){
persons.append(person).append(",");
}
v.set(persons.toString());
context.write(key,v);
}
}
Map2->FriendMap2.java
package day4_jobs_input.jobs.friends;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
import java.util.Arrays;
/**
* friendmap2
* @author Fantome
* @date 2019/5/28
*/
public class FriendMap2 extends Mapper<LongWritable, Text, Text, Text> {
Text TwoMan=new Text();
Text v=new Text();
/**
* 再次反转对本人1-本人2... 进行聚合,k:本人1-本人2(两个人) v:共同好友
* @param key
* @param value 好友1 \t 本人1,本人2,本人3
* @param context
* @throws IOException
* @throws InterruptedException
*/
@Override
protected void map(LongWritable key,
Text value,
Context context) throws IOException, InterruptedException {
//输入的数据 好友1 \t 本人1,本人2,本人3
//即 本人1和本人2 有个 共同好友1,本人1和本人3 有个 共同好友1 ......
String[] splits = value.toString().split("\t");
String[] persons = splits[1].split(",");
//将本人的集合进行排序
Arrays.sort(persons);
int num=persons.length;
v.set(splits[0]);
//输出:本人1-本人2 好友1,本人1-本人3 好友1 ......
for (int i=0;i<num;i++){
for (int j=i+1;j<num;j++){
TwoMan.set(persons[i]+"-"+persons[j]);
context.write(TwoMan,v);
}
}
}
}
Reduce2->FriendReduce2.java
package day4_jobs_input.jobs.friends;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
/**
* friendmap2
* @author Fantome
* @date 2019/5/28
*/
public class FriendReduce2 extends Reducer<Text, Text, Text, Text> {
Text v=new Text();
@Override
protected void reduce(Text key,
Iterable<Text> values,
Context context) throws IOException, InterruptedException {
//聚合输出: k:本人1-本人2 v:共同好友1,共同好友2
StringBuffer persons = new StringBuffer();
for (Text value:values){
persons.append(value.toString()).append(",");
}
v.set(persons.toString());
context.write(key,v);
}
}
Drive->FriendDriveAll.java
package day4_jobs_input.jobs.friends;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob;
import org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.log4j.BasicConfigurator;
/**
* friendDrive1
* @author Fantome
* @date 2019/5/28
*/
public class FriendDriveAll {
public static void main(String[] args) throws Exception {
args=new String[]{"E:\\桌面\\大数据\\test\\friends\\friends.txt",
"E:\\桌面\\大数据\\test\\friends\\friendOut1",
"E:\\桌面\\大数据\\test\\friends\\friendOut2"};
Configuration conf = new Configuration();
//实例化job1
Job job1 = Job.getInstance(conf);
job1.setJarByClass(FriendDrive1.class);
job1.setMapperClass(FriendMap1.class);
job1.setReducerClass(FriendReduce1.class);
job1.setMapOutputKeyClass(Text.class);
job1.setMapOutputValueClass(Text.class);
job1.setOutputKeyClass(Text.class);
job1.setOutputValueClass(Text.class);
FileInputFormat.setInputPaths(job1,new Path(args[0]));
FileOutputFormat.setOutputPath(job1,new Path(args[1]));
//实例化job2,job1的结果是job2的输入
Job job2 = Job.getInstance(conf);
job2.setJarByClass(FriendDrive2.class);
job2.setMapperClass(FriendMap2.class);
job2.setReducerClass(FriendReduce2.class);
job2.setMapOutputKeyClass(Text.class);
job2.setMapOutputValueClass(Text.class);
job2.setOutputKeyClass(Text.class);
job2.setOutputValueClass(Text.class);
FileInputFormat.setInputPaths(job2,new Path(args[1]));
FileOutputFormat.setOutputPath(job2,new Path(args[2]));
//job1和job2串联
JobControl control = new JobControl("friends");
ControlledJob ajob = new ControlledJob(job1.getConfiguration());
ControlledJob bjob = new ControlledJob(job2.getConfiguration());
//定义job1必定要在job2执行完成后开始
bjob.addDependingJob(ajob);
control.addJob(ajob);
control.addJob(bjob);
Thread thread = new Thread(control);
thread.start();
if (!control.allFinished()){
Thread.sleep(1000);
}else {
System.exit(0);
}
}
}
job1结果
A K,G,C,E,
B A,K,J,I,H,F,D,B,
C H,K,A,F,G,
D I,F,A,E,D,C,B,J,
E I,
F J,C,H,F,E,A,D,
G D,C,J,H,
K G,
L I,H,
R F,J,I,E,D,C,A,B,G,H,
S K,
T F,
U C,
X C,
Y F,
Z G,
job2结果
A-B R,D,B,
A-C R,D,F,
A-D D,R,B,F,
A-E R,F,D,
A-F R,B,F,D,C,
A-G R,C,
A-H F,C,B,R,
A-I B,D,R,
A-J F,B,D,R,
A-K B,C,
B-C R,D,
B-D D,R,B,
B-E R,D,
B-F B,R,D,
B-G R,
B-H B,R,
B-I D,R,B,
B-J B,D,R,
B-K B,
C-D D,G,R,F,
C-E A,R,F,D,
C-F R,F,D,
C-G R,A,
C-H R,G,F,
C-I D,R,
C-J D,F,R,G,
C-K A,
D-E R,F,D,
D-F F,D,R,B,
D-G R,
D-H F,B,G,R,
D-I D,R,B,
D-J B,G,F,D,R,
D-K B,
E-F D,F,R,
E-G R,A,
E-H F,R,
E-I D,R,
E-J F,D,R,
E-K A,
F-G C,R,
F-H F,C,B,R,
F-I R,B,D,
F-J R,B,D,F,
F-K B,C,
G-H R,C,
G-I R,
G-J R,
G-K A,C,
H-I R,B,L,
H-J F,G,R,B,
H-K C,B,
I-J R,B,D,
I-K B,
J-K B,