求所有两两用户之间的共同好友
A:B,C,D,F,E,O
B:A,C,E,K
C:F,A,D,I
D:A,E,F,L
E:B,C,D,M,L
F:A,B,C,D,E,O,M
G:A,C,D,E,F
H:A,C,D,E,O
I:A,O
J:B,O
K:A,C,D
L:D,E,F
M:E,F,G
O:A,H,I,J,K
以上是数据:
A:B,C,D,F,E,O
表示:B,C,D,E,F,O是A用户的好友。
首先求每一个人是哪些人的共同好友:
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class SameFriendOne {
/**
* 给定的文件内容样式:A:B,C,D,F,E,O
* A的好友是B,C,D,F,E,O
* 同理其他的行也是这样
* 我们就组合成一个(friend,person)的形式输出
* 主要是求某一个是谁的共同好友
* @author Administrator
*
*/
public static class MyMapper extends Mapper<LongWritable, Text, Text, Text>{
@Override
protected void map(LongWritable key, Text value,Context context)throws IOException, InterruptedException {
String[] lines = value.toString().split(":");
String person = lines[0];
for (String friend : lines[1].split(",")) {
context.write(new Text(friend), new Text(person));
}
}
}
public static class MyReducer extends Reducer<Text, Text, Text, Text>{
@Override
protected void reduce(Text friend, Iterable<Text> values,Context context)
throws IOException, InterruptedException {
StringBuilder persons = new StringBuilder();
for (Text t : values) {
persons.append(t).append(",");
}
context.write(new Text(friend), new Text(persons.toString()));
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(SameFriendOne.class);
job.setMapperClass(MyMapper.class);
job.setReducerClass(MyReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileInputFormat.setInputPaths(job, new Path("G:/files/input"));
FileOutputFormat.setOutputPath(job,new Path("G:/files/output") );
boolean isDone = job.waitForCompletion(true);
System.exit(isDone ? 0:1);
}
}
结果如下:
A F,I,O,K,G,D,C,H,B,
B E,J,F,A,
C B,E,K,A,H,G,F,
D H,C,G,F,E,A,K,L,
E A,B,L,G,M,F,D,H,
F C,M,L,A,D,G,
G M,
H O,
I O,C,
J O,
K O,B,
L D,E,
M E,F,
O A,H,I,J,F,
接下来我们开始求每两个之间的共同好友:
思路:A F,I,O,K,G,D,C,H,B,
对于这条记录我们对F,I,O,K,G,D,C,H,B,进行切分排序,然后采用两层循环
得到组合如:(B-C, A) (B-D,A).......这样我们就得到不同两个人之间的共同好友是A
同理其他的各行数据都做这个操作,然后在Mapper阶段使用形如:B-C作为Key,使用A作为Value 这种形式输出
在Reducer阶段进行聚合得到B-C之间的所有共同好友。
关键点是在循环的时候避免出现重复的数据形如:B-C 和 C-B
解决方案见下面的代码
import java.io.IOException;
import java.util.Arrays;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class SameFriendTwo {
public static class MyMapper extends Mapper<LongWritable, Text, Text, Text>{
@Override
protected void map(LongWritable key, Text value,Context context)throws IOException, InterruptedException {
String[] lines = value.toString().split("\t");
String friend = lines[0];
String[] persons = lines[1].split(",");
Arrays.sort(persons);
for (int i = 0; i < persons.length-1; i++) {
for (int j = i+1; j < persons.length; j++) {
context.write(new Text(persons[i]+"-"+persons[j]), new Text(friend));
}
}
}
}
public static class MyReducer extends Reducer<Text, Text, Text, Text>{
@Override
protected void reduce(Text per_per, Iterable<Text> friends,Context context)
throws IOException, InterruptedException {
StringBuilder sb = new StringBuilder();
for (Text t : friends) {
sb.append(t).append(",");
}
context.write(per_per, new Text(sb.toString().substring(0, sb.toString().length())));
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(SameFriendTwo.class);
job.setMapperClass(MyMapper.class);
job.setReducerClass(MyReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileInputFormat.setInputPaths(job, new Path("G:/files/output"));
FileOutputFormat.setOutputPath(job,new Path("G:/files/output2") );
boolean isDone = job.waitForCompletion(true);
System.exit(isDone ? 0:1);
}
}
得到的结果如下:
A-B E,C,
A-C D,F,
A-D E,F,
A-E B,C,D,
A-F C,E,O,D,B,
A-G E,F,C,D,
A-H C,D,E,O,
A-I O,
A-J O,B,
A-K C,D,
A-L F,D,E,
A-M F,E,
B-C A,
B-D A,E,
B-E C,
B-F C,A,E,
B-G E,C,A,
B-H E,C,A,
B-I A,
B-K A,C,
B-L E,
B-M E,
B-O A,K,
C-D A,F,
C-E D,
C-F A,D,
C-G A,D,F,
C-H D,A,
C-I A,
C-K A,D,
C-L D,F,
C-M F,
C-O I,A,
D-E L,
D-F A,E,
D-G E,A,F,
D-H A,E,
D-I A,
D-K A,
D-L E,F,
D-M F,E,
D-O A,
E-F D,M,C,B,
E-G C,D,
E-H C,D,
E-J B,
E-K C,D,
E-L D,
F-G D,C,A,E,
F-H A,D,O,E,C,
F-I O,A,
F-J B,O,
F-K D,C,A,
F-L E,D,
F-M E,
F-O A,
G-H D,C,E,A,
G-I A,
G-K D,A,C,
G-L D,F,E,
G-M E,F,
G-O A,
H-I O,A,
H-J O,
H-K A,C,D,
H-L D,E,
H-M E,
H-O A,
I-J O,
I-K A,
I-O A,
K-L D,
K-O A,
L-M E,F,