mapreduce应用-找出扣扣共同好友

需求:找出扣扣共同好友
用户:好友,好友2,…
A:B,C,D,F,E,O
B:A,C,E,K
C:F,A,D,I
D:A,E,F,L
E:B,C,D,M,L
F:A,B,C,D,E,O,M
G:A,C,D,E,F
H:A,C,D,E,O
I:A,O
J:B,O
K:A,C,D
L:D,E,F
M:E,F,G
O:A,H,I,J

思路:先找出哪个好友是哪些用户的好友,比如B是A,E,F,J的共同好友B–>A,E,F,J
写成

<A-E,B><A-f,B><A-J,B>...<用户1-用户2,好友B>

接着传到reduce中为

<用户1-用户2,好友迭代器>
输出成<用户1-用户2,好友1,好友2,好友3...>

程序实现:第一步

public class QQFriendsFindStepOne {
    static class QQFriendsFindStepOneMapper extends Mapper<LongWritable, Text, Text, Text>{
        @Override
        protected void map(LongWritable key, Text value, Context context)
                throws IOException, InterruptedException {
            //读取一行数据  A:B,C,D,F,E,O 用户:好友们
            String values= value.toString();
            //获取用户和好友
            String[] user_friends = values.split(":");
            String user = user_friends[0];
            String[] friends = user_friends[1].split(",");
            //输出<好友,用户>
            for (String friend : friends) {
                context.write(new Text(friend), new Text(user));
            }
        }
    }
    static class QQFriendsFindStepOneReducer extends Reducer<Text, Text, Text, Text>{
        //进入的是<好友,拥有该好友的用户的迭代器>
        @Override
        protected void reduce(Text friend, Iterable<Text> users, Context context)
                throws IOException, InterruptedException {
            StringBuffer sb = new StringBuffer();
            for (Text user : users) {
                sb.append(user.toString()+",");
            }
            //输出<好友,用户1,用户2,...,用户n,><A       C,B,D,F,E,O,>
            context.write(friend, new Text(sb.toString()));
        }
    }
    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);
        //jar包位置
        job.setJarByClass(QQFriendsFindStepOne.class);

        job.setMapperClass(QQFriendsFindStepOneMapper.class);
        job.setReducerClass(QQFriendsFindStepOneReducer.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);
        //设置最终输出类型
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        FileInputFormat.setInputPaths(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        boolean ex = job.waitForCompletion(true);
        System.exit(ex?0:1); 
    }
}

测试第一步输出:
将工程打包上传到hadoop集群
hdfs创建文件夹/friends/input
linux编辑文件friends.data,写入好友用户数据
将本地文件上传到/friends/input
运行程序查看输出结果

[root@mini1 ~]# hadoop fs -mkdir -p /friends/input
[root@mini1 ~]# vi friends.data
A:B,C,D,F,E,O
B:A,C,E,K
C:F,A,D,I
D:A,E,F,L
E:B,C,D,M,L
F:A,B,C,D,E,O,M
G:A,C,D,E,F
H:A,C,D,E,O
I:A,O
J:B,O
K:A,C,D
L:D,E,F
M:E,F,G
O:A,H,I,J
[root@mini1 ~]# hadoop fs -put friends.data /friends/input
[root@mini1 ~]# hadoop jar friends.jar com.scu.hadoop.sharefriends.QQFriendsFindStepOne  /friends/input /friends/output
[root@mini1 ~]# hadoop fs -cat /friends/input/part-r-00000
Java HotSpot(TM) Client VM warning: You have loaded library /root/apps/hadoop-2.6.4/lib/native/libhadoop.so.1.0.0 which might have disabled stack guard. The VM will try to fix the stack guard now.
It's highly recommended that you fix the library with 'execstack -c <libfile>', or link it with '-z noexecstack'.
cat: `/friends/input/part-r-00000': No such file or directory
[root@mini1 ~]# hadoop fs -cat /friends/output/part-r-00000
A       I,K,C,B,G,F,H,O,D,
B       A,F,J,E,
C       A,E,B,H,F,G,K,
D       G,C,K,A,L,F,E,H,
E       G,M,L,H,A,F,B,D,
F       L,M,D,C,G,A,
G       M,
H       O,
I       O,C,
J       O,
K       B,
L       D,E,
M       E,F,
O       A,H,I,J,F,

程序实现:第二步

public class QQFriendsFindStepTwo {
    static class QQFriendsFindStepTwoMapper extends Mapper<LongWritable, Text, Text, Text>{
        //传入数据为第一个程序输出文件的内容
        //<好友,用户1,用户2,...,用户n,><A       C,B,D,F,E,O,>
        @Override
        protected void map(LongWritable key, Text value, Context context)
                throws IOException, InterruptedException {
            String line = value.toString();
            //切割,获得用户和好友
            String[] friend_users = line.split("\t");
            String friend = friend_users[0];
            String[] users = friend_users[1].split(",");
            //一定要排序,否则可能出现重复的情况,比如<A-B,好友>和<B-A,好友>应该是一样的
            //不排序就做为了不同的key传给了reduce
            Arrays.sort(users);
            StringBuffer sb = new StringBuffer();
            //输出<用户a-用户b,好友>
            for(int i=0;i<users.length-2;i++){
                for(int j=i+1;j<users.length-1;j++){
                    context.write(new Text(users[i]+"-"+users[j]), new Text(friend));
                }
            }
        }
    }
    static class QQFriendsFindStepTwoReducer extends Reducer<Text, Text, Text, Text>{
        //传入的数据<用户a-用户b,好友迭代器>
        @Override
        protected void reduce(Text userTuser, Iterable<Text> friends, Context context)
                throws IOException, InterruptedException {
            StringBuffer sb = new StringBuffer();
            for (Text friend : friends) {
                sb.append(friend+",");
            }
            //写出数据格式<用户1-用户2,好友列表><A-B,C,D,E...>
            context.write(userTuser, new Text(sb.toString()));
        }
    }
    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);
        //jar包位置
        job.setJarByClass(QQFriendsFindStepTwo.class);

        job.setMapperClass(QQFriendsFindStepTwoMapper.class);
        job.setReducerClass(QQFriendsFindStepTwoReducer.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);
        //设置最终输出类型
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        FileInputFormat.setInputPaths(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        boolean ex = job.waitForCompletion(true);
        System.exit(ex?0:1); 
    }
}

第二步测试:
重新将工程打包上传到hadoop集群
将第一步的输出文件作为该步的输入文件

[root@mini1 ~]# hadoop jar friends.jar com.scu.hadoop.sharefriends.QQFriendsFindStepTwo  /friends/output/part-r-00000 /friends/output2
[root@mini1 ~]# hadoop fs -cat /friends/output2/part-r-00000
A-B     C,E,
A-C     F,D,
A-D     E,F,
A-E     B,C,D,
A-F     C,D,B,E,O,
A-G     D,E,F,C,
A-H     E,O,C,D,
A-I     O,
A-K     D,
A-L     F,E,
B-C     A,
B-D     E,A,
B-E     C,
B-F     E,A,C,
B-G     C,E,A,
B-H     E,C,A,
B-I     A,
B-K     A,
B-L     E,
C-D     F,A,
C-E     D,
C-F     D,A,
C-G     F,A,D,
C-H     A,D,
C-I     A,
C-K     D,A,
C-L     F,
D-F     E,A,
D-G     A,E,F,
D-H     A,E,
D-I     A,
D-K     A,
D-L     F,E,
E-F     C,D,B,
E-G     D,C,
E-H     D,C,
E-K     D,
F-G     C,E,D,A,
F-H     C,A,D,E,O,
F-I     A,O,
F-K     D,A,
F-L     E,
G-H     D,E,C,A,
G-I     A,
G-K     A,D,
G-L     F,E,
H-I     A,O,
H-K     A,D,
H-L     E,
I-K     A,
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值