现有如下数据:
A:B,C,D,F,E,O
B:A,C,E,K
C:F,A,D,I
D:A,E,F,L
E:B,C,D,M,L
F:A,B,C,D,E,O,M
G:A,C,D,E,F
H:A,C,D,E,O
I:A,O
J:B,O
K:A,C,D
L:D,E,F
M:E,F,G
O:A,H,I,J
求哪些人两两之间有共同好友,及共同好友有哪些人?
思路:
第一步:只求哪些人两两之间有某个共同好友
A:B,C,D,F,E,O
for(好友列表){
context.write(“好友”,“用户”);
}
A-B C
A-B E
A-C F
…
package com.bigdata.mapreduce.friends;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class CommonFriendsOne {
public static class CommonFriendsOneMapper extends Mapper<LongWritable, Text, Text, Text>{
Text k = new Text();
Text v = new Text();
//A:B,C,D,F,E,O
// 输出: B->A C->A D->A ...
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
String[] userAndFriends = value.toString().split(":");
String user = userAndFriends[0];
String[] friends = userAndFriends[1].split(",");
v.set(user);
for (String f : friends) {
k.set(f);
context.write(k, v);
}
}
}
public static class CommonFriendsOneReducer extends Reducer<Text, Text, Text, Text>{
// 一组数据: B --> A E F J .....
// 一组数据: C --> B F E J .....
@Override
protected void reduce(Text friend, Iterable<Text> users, Reducer<Text, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
ArrayList<String> userList = new ArrayList<>();
for(Text user: users){
userList.add(user.toString());
}
Collections.sort(userList);
for(int i=0;i<userList.size()-1;i++){
for(int j=i+1;j<userList.size();j++){
context.write(new Text(userList.get(i)+"-"+userList.get(j)), friend);
}
}
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(CommonFriendsOne.class);
job.setMapperClass(CommonFriendsOneMapper.class);
job.setReducerClass(CommonFriendsOneReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileInputFormat.setInputPaths(job, new Path("D:\\mrdata\\friends\\input"));
FileOutputFormat.setOutputPath(job, new Path("D:\\mrdata\\friends\\out1"));
job.waitForCompletion(true);
}
}
生成数据如下
B-C A
B-D A
B-F A
B-G A
B-H A
B-I A
B-K A
B-O A
C-D A
C-F A
C-G A
C-H A
C-I A
C-K A
C-O A
D-F A
D-G A
D-H A
D-I A
D-K A
D-O A
F-G A
F-H A
F-I A
F-K A
F-O A
G-H A
G-I A
G-K A
G-O A
H-I A
H-K A
H-O A
I-K A
I-O A
K-O A
A-E B
A-F B
A-J B
E-F B
E-J B
F-J B
A-B C
A-E C
A-F C
A-G C
A-H C
A-K C
B-E C
B-F C
B-G C
B-H C
B-K C
E-F C
E-G C
E-H C
E-K C
F-G C
F-H C
F-K C
G-H C
G-K C
H-K C
A-C D
A-E D
A-F D
A-G D
A-H D
A-K D
A-L D
C-E D
C-F D
C-G D
C-H D
C-K D
C-L D
E-F D
E-G D
E-H D
E-K D
E-L D
F-G D
F-H D
F-K D
F-L D
G-H D
G-K D
G-L D
H-K D
H-L D
K-L D
A-B E
A-D E
A-F E
A-G E
A-H E
A-L E
A-M E
B-D E
B-F E
B-G E
B-H E
B-L E
B-M E
D-F E
D-G E
D-H E
D-L E
D-M E
F-G E
F-H E
F-L E
F-M E
G-H E
G-L E
G-M E
H-L E
H-M E
L-M E
A-C F
A-D F
A-G F
A-L F
A-M F
C-D F
C-G F
C-L F
C-M F
D-G F
D-L F
D-M F
G-L F
G-M F
L-M F
C-O I
D-E L
E-F M
A-F O
A-H O
A-I O
A-J O
F-H O
F-I O
F-J O
H-I O
H-J O
I-J O
第二步:将第一步生成的数据以左侧数据为key右侧数据为value再次计算
package com.bigdata.mapreduce.friends;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class CommonFriendsTwo {
public static class CommonFriendsTwoMapper extends Mapper<LongWritable, Text, Text, Text>{
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
String [] splits = value.toString().split("\t");
context.write(new Text(splits[0]), new Text(splits[1]));
}
}
public static class CommonFriendsTwoReducer extends Reducer<Text, Text, Text, Text>{
@Override
protected void reduce(Text friend, Iterable<Text> users, Reducer<Text, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
StringBuilder sb = new StringBuilder();
for (Text user : users) {
sb.append(user.toString()).append("\t");
}
context.write(friend,new Text(sb.toString()));
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(CommonFriendsTwo.class);
job.setMapperClass(CommonFriendsTwoMapper.class);
job.setReducerClass(CommonFriendsTwoReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileInputFormat.setInputPaths(job, new Path("D:\\mrdata\\friends\\out1"));
FileOutputFormat.setOutputPath(job, new Path("D:\\mrdata\\friends\\out2"));
job.waitForCompletion(true);
}
}
最终结果如下:
A-B E C
A-C D F
A-D E F
A-E D B C
A-F O B C D E
A-G F E C D
A-H E C D O
A-I O
A-J O B
A-K D C
A-L F E D
A-M E F
B-C A
B-D A E
B-E C
B-F E A C
B-G C E A
B-H A E C
B-I A
B-K C A
B-L E
B-M E
B-O A
C-D A F
C-E D
C-F D A
C-G D F A
C-H D A
C-I A
C-K A D
C-L D F
C-M F
C-O I A
D-E L
D-F A E
D-G E A F
D-H A E
D-I A
D-K A
D-L E F
D-M F E
D-O A
E-F D M C B
E-G C D
E-H C D
E-J B
E-K C D
E-L D
F-G D C A E
F-H A D O E C
F-I O A
F-J B O
F-K D C A
F-L E D
F-M E
F-O A
G-H D C E A
G-I A
G-K D A C
G-L D F E
G-M E F
G-O A
H-I O A
H-J O
H-K A C D
H-L D E
H-M E
H-O A
I-J O
I-K A
I-O A
K-L D
K-O A
L-M E F