MapReduce共同好友案例

最新推荐文章于 2022-04-22 10:21:26 发布

weizhouck

最新推荐文章于 2022-04-22 10:21:26 发布

阅读量514

点赞数

分类专栏： mapreduce

本文链接：https://blog.csdn.net/WandaZw/article/details/82744717

版权

mapreduce 专栏收录该内容

5 篇文章 0 订阅

订阅专栏

需求：根据已有数据，求出两两之间共同好友是谁

A:B,C,D,F,E,O
B:A,C,E,K
C:F,A,D,I
D:A,E,F,L
E:B,C,D,M,L
F:A,B,C,D,E,O,M
G:A,C,D,E,F
H:A,C,D,E,O
I:A,O
J:B,O
K:A,C,D
L:D,E,F
M:E,F,G
O:A,H,I,J

思路分析：

B--A C--A D--A F--A 第一个输出 B--> A E J H O

第二个Mapper输出：A-E B ; A-J B ; A-H B ; E-J B ; 统计获取 A-E B C D ; A-M E F....

(1)第一步：统计每个好友对应哪些用户

package com.mapreduce.friends;

import com.mapreduce.wordCount.WordCountDriver;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

import java.io.IOException;

public class FriendsStepOne {

    public static class FriendsStepOneMapper extends Mapper<LongWritable,Text,Text,Text> {
        Text k = new Text();
        Text v = new Text();
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String line = value.toString();

            String user = line.split(":")[0];
            String friends = line.split(":")[1];
            for( String friend : friends.split( "," ) ){
                k.set( friend );
                v.set( user );
                context.write( k,v );
            }

        }
    }

    public static class FriendsStepOneReducer extends Reducer<Text,Text,Text,Text>{
        @Override
        protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
            StringBuffer stringBuffer = new StringBuffer();
            for ( Text value :values ){
                stringBuffer.append( value.toString() ).append(" ");
            }
            context.write( key , new Text( stringBuffer.toString() ) );
        }
    }

    public static void main(String[] args) throws Exception{
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);
        job.setJarByClass(WordCountDriver.class);

        //告诉程序，我们的程序所用的mapper类和reducer类是什么
        job.setMapperClass(FriendsStepOneMapper.class);
        job.setReducerClass(FriendsStepOneReducer.class);

        //告诉框架，我们程序输出的数据类型
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        //告诉框架，我们程序使用的数据读取组件 结果输出所用的组件是什么
        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);

        //告诉框架，我们要处理的数据文件在那个路劲下
        FileInputFormat.setInputPaths(job, new Path("d://bigDataJob/commonFriends/input"));

        //告诉框架，我们的处理结果要输出到什么地方
        Path out = new Path("d://bigDataJob/commonFriends/output");
        FileSystem fileSystem = FileSystem.get(conf);
        if (fileSystem.exists(out)) {
            fileSystem.delete(out, true);
        }
        FileOutputFormat.setOutputPath(job, out);

        boolean res = job.waitForCompletion(true);

        System.exit(res?0:1);
    }

}

（2）将用户集合两两排列，统计出共同的好友（防止数据重复 a-b b-a 的情况发生，此处需要对用户进行排序）

package com.mapreduce.friends;

import com.mapreduce.wordCount.WordCountDriver;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

import java.io.IOException;
import java.util.Arrays;

public class FriendsStepTwo {

    public static class FriendsStepTwoMapper extends Mapper<LongWritable,Text,Text,Text> {
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String line = value.toString();
            String friend = line.split( "\t" )[0];
            String [] users = line.split( "\t" )[1].split( " " );
            Arrays.sort( users );
            for( int i=0;i<users.length-1 ;i++ ){
                for( int j=i+1;j<users.length;j++ ){
                    context.write( new Text(users[i]+"-"+users[j]),new Text( friend ) );
                }
            }
        }
    }

    public static class FriendsStepTwoReducer extends Reducer<Text,Text,Text,Text> {
        @Override
        protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
            StringBuffer stringBuffer = new StringBuffer();
            for ( Text value :values ){
                stringBuffer.append( value.toString() ).append(" ");
            }
            context.write( key , new Text( stringBuffer.toString() ) );
        }
    }

    public static void main(String[] args) throws Exception{
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);
        job.setJarByClass(WordCountDriver.class);

        //告诉程序，我们的程序所用的mapper类和reducer类是什么
        job.setMapperClass(FriendsStepTwoMapper.class);
        job.setReducerClass(FriendsStepTwoReducer.class);

        //告诉框架，我们程序输出的数据类型
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        //告诉框架，我们程序使用的数据读取组件 结果输出所用的组件是什么
        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);

        //告诉框架，我们要处理的数据文件在那个路劲下
        FileInputFormat.setInputPaths(job, new Path("d://bigDataJob/commonFriends/output"));

        //告诉框架，我们的处理结果要输出到什么地方
        Path out = new Path("d://bigDataJob/commonFriends/output-2");
        FileSystem fileSystem = FileSystem.get(conf);
        if (fileSystem.exists(out)) {
            fileSystem.delete(out, true);
        }
        FileOutputFormat.setOutputPath(job, out);

        boolean res = job.waitForCompletion(true);

        System.exit(res?0:1);
    }


}