需求:找出扣扣共同好友
用户:好友,好友2,…
A:B,C,D,F,E,O
B:A,C,E,K
C:F,A,D,I
D:A,E,F,L
E:B,C,D,M,L
F:A,B,C,D,E,O,M
G:A,C,D,E,F
H:A,C,D,E,O
I:A,O
J:B,O
K:A,C,D
L:D,E,F
M:E,F,G
O:A,H,I,J
思路:先找出哪个好友是哪些用户的好友,比如B是A,E,F,J的共同好友B–>A,E,F,J
写成
<A-E,B><A-f,B><A-J,B>...<用户1-用户2,好友B>
接着传到reduce中为
<用户1-用户2,好友迭代器>
输出成<用户1-用户2,好友1,好友2,好友3...>
程序实现:第一步
public class QQFriendsFindStepOne {
static class QQFriendsFindStepOneMapper extends Mapper<LongWritable, Text, Text, Text>{
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
//读取一行数据 A:B,C,D,F,E,O 用户:好友们
String values= value.toString();
//获取用户和好友
String[] user_friends = values.split(":");
String user = user_friends[0];
String[] friends = user_friends[1].split(",");
//输出<好友,用户>
for (String friend : friends) {
context.write(new Text(friend), new Text(user));
}
}
}
static class QQFriendsFindStepOneReducer extends Reducer<Text, Text, Text, Text>{
//进入的是<好友,拥有该好友的用户的迭代器>
@Override
protected void reduce(Text friend, Iterable<Text> users, Context context)
throws IOException, InterruptedException {
StringBuffer sb = new StringBuffer();
for (Text user : users) {
sb.append(user.toString()+",");
}
//输出<好友,用户1,用户2,...,用户n,><A C,B,D,F,E,O,>
context.write(friend, new Text(sb.toString()));
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
//jar包位置
job.setJarByClass(QQFriendsFindStepOne.class);
job.setMapperClass(QQFriendsFindStepOneMapper.class);
job.setReducerClass(QQFriendsFindStepOneReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
//设置最终输出类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
boolean ex = job.waitForCompletion(true);
System.exit(ex?0:1);
}
}
测试第一步输出:
将工程打包上传到hadoop集群
hdfs创建文件夹/friends/input
linux编辑文件friends.data,写入好友用户数据
将本地文件上传到/friends/input
运行程序查看输出结果
[root@mini1 ~]# hadoop fs -mkdir -p /friends/input
[root@mini1 ~]# vi friends.data
A:B,C,D,F,E,O
B:A,C,E,K
C:F,A,D,I
D:A,E,F,L
E:B,C,D,M,L
F:A,B,C,D,E,O,M
G:A,C,D,E,F
H:A,C,D,E,O
I:A,O
J:B,O
K:A,C,D
L:D,E,F
M:E,F,G
O:A,H,I,J
[root@mini1 ~]# hadoop fs -put friends.data /friends/input
[root@mini1 ~]# hadoop jar friends.jar com.scu.hadoop.sharefriends.QQFriendsFindStepOne /friends/input /friends/output
[root@mini1 ~]# hadoop fs -cat /friends/input/part-r-00000
Java HotSpot(TM) Client VM warning: You have loaded library /root/apps/hadoop-2.6.4/lib/native/libhadoop.so.1.0.0 which might have disabled stack guard. The VM will try to fix the stack guard now.
It's highly recommended that you fix the library with 'execstack -c <libfile>', or link it with '-z noexecstack'.
cat: `/friends/input/part-r-00000': No such file or directory
[root@mini1 ~]# hadoop fs -cat /friends/output/part-r-00000
A I,K,C,B,G,F,H,O,D,
B A,F,J,E,
C A,E,B,H,F,G,K,
D G,C,K,A,L,F,E,H,
E G,M,L,H,A,F,B,D,
F L,M,D,C,G,A,
G M,
H O,
I O,C,
J O,
K B,
L D,E,
M E,F,
O A,H,I,J,F,
程序实现:第二步
public class QQFriendsFindStepTwo {
static class QQFriendsFindStepTwoMapper extends Mapper<LongWritable, Text, Text, Text>{
//传入数据为第一个程序输出文件的内容
//<好友,用户1,用户2,...,用户n,><A C,B,D,F,E,O,>
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String line = value.toString();
//切割,获得用户和好友
String[] friend_users = line.split("\t");
String friend = friend_users[0];
String[] users = friend_users[1].split(",");
//一定要排序,否则可能出现重复的情况,比如<A-B,好友>和<B-A,好友>应该是一样的
//不排序就做为了不同的key传给了reduce
Arrays.sort(users);
StringBuffer sb = new StringBuffer();
//输出<用户a-用户b,好友>
for(int i=0;i<users.length-2;i++){
for(int j=i+1;j<users.length-1;j++){
context.write(new Text(users[i]+"-"+users[j]), new Text(friend));
}
}
}
}
static class QQFriendsFindStepTwoReducer extends Reducer<Text, Text, Text, Text>{
//传入的数据<用户a-用户b,好友迭代器>
@Override
protected void reduce(Text userTuser, Iterable<Text> friends, Context context)
throws IOException, InterruptedException {
StringBuffer sb = new StringBuffer();
for (Text friend : friends) {
sb.append(friend+",");
}
//写出数据格式<用户1-用户2,好友列表><A-B,C,D,E...>
context.write(userTuser, new Text(sb.toString()));
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
//jar包位置
job.setJarByClass(QQFriendsFindStepTwo.class);
job.setMapperClass(QQFriendsFindStepTwoMapper.class);
job.setReducerClass(QQFriendsFindStepTwoReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
//设置最终输出类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
boolean ex = job.waitForCompletion(true);
System.exit(ex?0:1);
}
}
第二步测试:
重新将工程打包上传到hadoop集群
将第一步的输出文件作为该步的输入文件
[root@mini1 ~]# hadoop jar friends.jar com.scu.hadoop.sharefriends.QQFriendsFindStepTwo /friends/output/part-r-00000 /friends/output2
[root@mini1 ~]# hadoop fs -cat /friends/output2/part-r-00000
A-B C,E,
A-C F,D,
A-D E,F,
A-E B,C,D,
A-F C,D,B,E,O,
A-G D,E,F,C,
A-H E,O,C,D,
A-I O,
A-K D,
A-L F,E,
B-C A,
B-D E,A,
B-E C,
B-F E,A,C,
B-G C,E,A,
B-H E,C,A,
B-I A,
B-K A,
B-L E,
C-D F,A,
C-E D,
C-F D,A,
C-G F,A,D,
C-H A,D,
C-I A,
C-K D,A,
C-L F,
D-F E,A,
D-G A,E,F,
D-H A,E,
D-I A,
D-K A,
D-L F,E,
E-F C,D,B,
E-G D,C,
E-H D,C,
E-K D,
F-G C,E,D,A,
F-H C,A,D,E,O,
F-I A,O,
F-K D,A,
F-L E,
G-H D,E,C,A,
G-I A,
G-K A,D,
G-L F,E,
H-I A,O,
H-K A,D,
H-L E,
I-K A,