Mapreduce好友推荐

Mapreduce-好友推荐


数据集切记不能留空行
第一位表示的是本人,后面表示的是他的好友

tom hello hadoop cat
world hadoop hello hive
cat tom hive
mr hive hello
hive cat hadoop world hello mr
hadoop tom hive world
hello tom world hive mr

思路:
比如第一行,tom和后面每个人都是认识的,所以hello-hadoop有共同好友就是tom,hello-cat也有公共好友tom,以此类推。
第一行<tom-hello,R> <hello-hadoop,G>,R是认识,G表示共同好友,统计共有多少组键值对。
因为有的是<hadoop-hello,G>,所以通过比较两个姓名ASCII码值,把和变成相同,在下面map的getnames()函数中。

Mapper


import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class FOFMapper extends Mapper<LongWritable, Text,Text,Text>{
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String line = value.toString();
        //{"","","",""}
        String[] names = line.split(" ");
        for (int j=0; j< names.length; j++) {
            for (int i = j+1; i < names.length; i++) {
                if(j==0){
                    context.write(new Text(getNames(names[j],names[i])),new Text("R"));
                }else{
                    context.write(new Text(getNames(names[j],names[i])),new Text("G"));
                }
            }
        }
    }
    //使<hello-hadoop>和<hadoop-hello>都变成<hadoop-hello>方便统计
    private String getNames(String namea,String nameb){
        int result = namea.compareTo(nameb);
        if(result>0){
            return namea + "_" + nameb;
        }
        return nameb + "_" + namea;
    }
}

Reducer

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class FOFReducer extends Reducer<Text,Text,Text, IntWritable> {
    @Override
    protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
       int sum = 0;
       for (Text value : values){
           String val = value.toString();
           if("R".equals(val)){
               return;
           }
           sum++;
       }
       context.write(key, new IntWritable(sum));
    }
}

MainClass

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class MainClass {
    public static void main(String[] args) throws Exception {
        if(args == null || args.length!=2){
            System.err.println(" yarn jar myfof.jar xpu.com.mr.MainClass <inputpath> <outPath>");
            System.exit(1);
        }

        Configuration conf = new Configuration(true);
        Job job = Job.getInstance(conf);
		//本地运行,可打包到集群中(yarn jar 包名 主类名 输入路径 输出路径 )
        conf.set("mapreduce.framework.name","local");
        job.setJobName("好友推荐-共同好友数");
        job.setJarByClass(MainClass.class);


        FileInputFormat.addInputPath(job,new Path(args[0]));
        FileOutputFormat.setOutputPath(job,new Path(args[1]));

        job.setMapperClass(FOFMapper.class);
        job.setReducerClass(FOFReducer.class);

        job.setMapOutputValueClass(Text.class);
        job.setMapOutputKeyClass(Text.class);

        /*job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);*/
        job.waitForCompletion(true);
    }
}

以上获取到的是hello_hadoop,2类似这样的数据,但这并不能帮助推荐哪些好友,因为我们不知道后面的2这样的数字哪两个好友间最大。
按照_下划线切分得到hello hadoop,2这种结构的数据

package com.xpu.mr;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class FOF2Mapper extends Mapper<Text, Text,Text,Text>{
    @Override
    protected void map(Text key, Text value, Context context) throws IOException, InterruptedException {
        String string = key.toString();
        String[] keypart = string.split("_");
        String num = value.toString();
        context.write(new Text(keypart[0]),new Text(keypart[1]+","+num));
        context.write(new Text(keypart[1]),new Text(keypart[0]+","+num));
    }
}

按照将hello hadoop作为键,2作为值放入到map集合中

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;


public class FOF2Reducer extends Reducer<Text,Text,Text, Text> {
    @Override
    protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
        Map<String,Integer> va = new HashMap<String, Integer>();

        for (Text value : values) {
            String valueStr = value.toString();
            va.put(valueStr.substring(0,valueStr.indexOf(",")),Integer.valueOf(valueStr.substring(valueStr.indexOf(",")+1)));
        }

        List<Map.Entry<String,Integer>> myList = new ArrayList<>();

        for(Map.Entry<String,Integer> entry: va.entrySet()){
           Integer value = entry.getValue();
           boolean flag = false;
           for(int i =0; i< myList.size();i++){
               Integer myValue = myList.get(i).getValue();
               if(value>myValue){
                   myList.add(i,entry);
                   flag = true;
                   break;
               }
           }
           if(!flag){
               myList.add(entry);
           }
        }
        myList.forEach(ele -> {
            try{
                context.write(key, new Text(ele.getKey() + "," + ele.getValue()));
            }catch (IOException e){
                e.printStackTrace();
            }catch (InterruptedException e){
                e.printStackTrace();
            }
        });
    }
}

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class MainClass2 {
    public static void main(String[] args) throws Exception {
        if(args == null || args.length!=2){
            System.err.println(" yarn jar myfof.jar xpu.com.mr.MainClass <inputpath> <outPath>");
            System.exit(1);
        }

        Configuration conf = new Configuration(true);
        Job job = Job.getInstance(conf);

        //conf.set("mapreduce.framework.name","local");
        job.setJobName("好友推荐2-共同好友数");
        job.setJarByClass(MainClass2.class);
        //设置inputformat的具体实现key是行中第一个\t之前的部分,如果没有\t,则整行是key,value是空
        job.setInputFormatClass(KeyValueTextInputFormat.class);
        KeyValueTextInputFormat.addInputPath(job,new Path(args[0]));

        FileOutputFormat.setOutputPath(job,new Path(args[1]));

        job.setMapperClass(FOF2Mapper.class);
        job.setReducerClass(FOF2Reducer.class);

        job.setMapOutputValueClass(Text.class);
        job.setMapOutputKeyClass(Text.class);

        /*job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);*/
        job.waitForCompletion(true);
    }
}
  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值