Mapreduce好友推荐

最新推荐文章于 2022-04-19 15:04:32 发布

weixin_46373361

最新推荐文章于 2022-04-19 15:04:32 发布

阅读量185

点赞数

分类专栏： hadoop 文章标签： mapreduce hadoop 大数据

本文链接：https://blog.csdn.net/weixin_46373361/article/details/115481925

版权

hadoop 专栏收录该内容

16 篇文章 0 订阅

订阅专栏

Mapreduce-好友推荐

数据集切记不能留空行
第一位表示的是本人，后面表示的是他的好友

tom hello hadoop cat
world hadoop hello hive
cat tom hive
mr hive hello
hive cat hadoop world hello mr
hadoop tom hive world
hello tom world hive mr

思路：
比如第一行，tom和后面每个人都是认识的，所以hello-hadoop有共同好友就是tom，hello-cat也有公共好友tom，以此类推。
第一行<tom-hello,R> <hello-hadoop,G>，R是认识，G表示共同好友，统计共有多少组键值对。
因为有的是<hadoop-hello,G>，所以通过比较两个姓名ASCII码值，把和变成相同，在下面map的getnames()函数中。

Mapper


import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class FOFMapper extends Mapper<LongWritable, Text,Text,Text>{
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String line = value.toString();
        //{"","","",""}
        String[] names = line.split(" ");
        for (int j=0; j< names.length; j++) {
            for (int i = j+1; i < names.length; i++) {
                if(j==0){
                    context.write(new Text(getNames(names[j],names[i])),new Text("R"));
                }else{
                    context.write(new Text(getNames(names[j],names[i])),new Text("G"));
                }
            }
        }
    }
    //使<hello-hadoop>和<hadoop-hello>都变成<hadoop-hello>方便统计
    private String getNames(String namea,String nameb){
        int result = namea.compareTo(nameb);
        if(result>0){
            return namea + "_" + nameb;
        }
        return nameb + "_" + namea;
    }
}

Reducer

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class FOFReducer extends Reducer<Text,Text,Text, IntWritable> {
    @Override
    protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
       int sum = 0;
       for (Text value : values){
           String val = value.toString();
           if("R".equals(val)){
               return;
           }
           sum++;
       }
       context.write(key, new IntWritable(sum));
    }
}

MainClass

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class MainClass {
    public static void main(String[] args) throws Exception {
        if(args == null || args.length!=2){
            System.err.println(" yarn jar myfof.jar xpu.com.mr.MainClass <inputpath> <outPath>");
            System.exit(1);
        }

        Configuration conf = new Configuration(true);
        Job job = Job.getInstance(conf);
		//本地运行，可打包到集群中（yarn jar 包名 主类名 输入路径 输出路径 ）
        conf.set("mapreduce.framework.name","local");
        job.setJobName("好友推荐-共同好友数");
        job.setJarByClass(MainClass.class);


        FileInputFormat.addInputPath(job,new Path(args[0]));
        FileOutputFormat.setOutputPath(job,new Path(args[1]));

        job.setMapperClass(FOFMapper.class);
        job.setReducerClass(FOFReducer.class);

        job.setMapOutputValueClass(Text.class);
        job.setMapOutputKeyClass(Text.class);

        /*job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);*/
        job.waitForCompletion(true);
    }
}

以上获取到的是hello_hadoop,2类似这样的数据，但这并不能帮助推荐哪些好友，因为我们不知道后面的2这样的数字哪两个好友间最大。
按照_下划线切分得到hello hadoop,2这种结构的数据

package com.xpu.mr;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class FOF2Mapper extends Mapper<Text, Text,Text,Text>{
    @Override
    protected void map(Text key, Text value, Context context) throws IOException, InterruptedException {
        String string = key.toString();
        String[] keypart = string.split("_");
        String num = value.toString();
        context.write(new Text(keypart[0]),new Text(keypart[1]+","+num));
        context.write(new Text(keypart[1]),new Text(keypart[0]+","+num));
    }
}

按照将hello hadoop作为键，2作为值放入到map集合中

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;


public class FOF2Reducer extends Reducer<Text,Text,Text, Text> {
    @Override
    protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
        Map<String,Integer> va = new HashMap<String, Integer>();

        for (Text value : values) {
            String valueStr = value.toString();
            va.put(valueStr.substring(0,valueStr.indexOf(",")),Integer.valueOf(valueStr.substring(valueStr.indexOf(",")+1)));
        }

        List<Map.Entry<String,Integer>> myList = new ArrayList<>();

        for(Map.Entry<String,Integer> entry: va.entrySet()){
           Integer value = entry.getValue();
           boolean flag = false;
           for(int i =0; i< myList.size();i++){
               Integer myValue = myList.get(i).getValue();
               if(value>myValue){
                   myList.add(i,entry);
                   flag = true;
                   break;
               }
           }
           if(!flag){
               myList.add(entry);
           }
        }
        myList.forEach(ele -> {
            try{
                context.write(key, new Text(ele.getKey() + "," + ele.getValue()));
            }catch (IOException e){
                e.printStackTrace();
            }catch (InterruptedException e){
                e.printStackTrace();
            }
        });
    }
}

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class MainClass2 {
    public static void main(String[] args) throws Exception {
        if(args == null || args.length!=2){
            System.err.println(" yarn jar myfof.jar xpu.com.mr.MainClass <inputpath> <outPath>");
            System.exit(1);
        }

        Configuration conf = new Configuration(true);
        Job job = Job.getInstance(conf);

        //conf.set("mapreduce.framework.name","local");
        job.setJobName("好友推荐2-共同好友数");
        job.setJarByClass(MainClass2.class);
        //设置inputformat的具体实现key是行中第一个\t之前的部分，如果没有\t，则整行是key，value是空
        job.setInputFormatClass(KeyValueTextInputFormat.class);
        KeyValueTextInputFormat.addInputPath(job,new Path(args[0]));

        FileOutputFormat.setOutputPath(job,new Path(args[1]));

        job.setMapperClass(FOF2Mapper.class);
        job.setReducerClass(FOF2Reducer.class);

        job.setMapOutputValueClass(Text.class);
        job.setMapOutputKeyClass(Text.class);

        /*job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);*/
        job.waitForCompletion(true);
    }
}

weixin_46373361

关注

0
点赞
踩
3

收藏

觉得还不错? 一键收藏
0
评论
Mapreduce好友推荐

Mapreduce-好友推荐MapperReducerMainClass数据集在集群中编辑vim file.txt，切记不能留空行第一位表示的是本人，后面表示的是他的好友tom hello hadoop catworld hadoop hello hivecat tom hivemr hive hellohive cat hadoop world hello mrhadoop tom hive worldhello tom world hive mr思路：A B C D E F1
复制链接

扫一扫

专栏目录