Hadoop:共同好友与二度人脉-1

目录

1 求取共同好友思路

 2 下面是具体的代码


类似求取共同好友、二度人脉这类的任务(当然这里的环境是使用Hadoop MapReuce程序实现)解法。

1 求取共同好友思路

数据记录格式如下:
  A:B,C,D,E,F 其中,A表示用户,":"后面的表示其好友列表。
分为两个MR步骤共4个阶段:

  1. 第一个任务的map阶段:写出的是(B A),(C A),(D A),(E A),(F A),表示A是哪些人的好友;
  2. 第一个任务的reduce阶段:写出的是(A    F,H,O,D,G,B,K,C,I),表示F,H…C,I均有A这个好友;
  3. 第二个任务的map阶段:写出的是(F-H A),(F-O A)…(H-0 A),(H-D A)…表示F-H 有共同好友A;
  4. 第二个任务的reduce阶段:写出的是(F-H A,D,C)…,表示F-H的所有共同好友。

 2 下面是具体的代码

  • StepOneMapper.java
package hadoop.common.friends;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class StepOneMapper extends Mapper<LongWritable, Text,Text,Text> {
    private static Text key = new Text();
    private static Text val = new Text();
    @Override
    protected void map(LongWritable a, Text value, Context context) throws IOException, InterruptedException {
        //A:B,C,D,E,F
        String line = value.toString();
        String[] userAndFriends = line.split(":");
        String user = userAndFriends[0];
        String friends = userAndFriends[1];
        for (String friend:friends.split(",")){
            key.set(friend);
            val.set(user);
            context.write(key,val);
        }
    }
}

  • StepOneReducer.java
package hadoop.common.friends;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class StepOneReducer extends Reducer<Text,Text,Text,Text> {
    private static Text val = new Text();
    @Override
    protected void reduce(Text friend, Iterable<Text> users, Context context) throws IOException, InterruptedException {
        StringBuilder stringBuilder = new StringBuilder();
        for (Text person:users){
            stringBuilder.append(person).append(",");
        }
        val.set(stringBuilder.toString());
        context.write(friend,val);
    }
}
  • StepTwoMapper.java
package hadoop.common.friends;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;
import java.util.Arrays;

public class StepTwoMapper extends Mapper<LongWritable, Text,Text,Text> {

    private static Text key = new Text();
    private static Text val = new Text();
    //A F,H,D,G,B,K,C,I,O,
    @Override
    protected void map(LongWritable a, Text value, Context context) throws IOException, InterruptedException {
        String line = value.toString();
        String[] friendAndUsers = line.split("\t");
        String friend = friendAndUsers[0];
        String[] users = friendAndUsers[1].split(",");
        Arrays.sort(users);
        int len = users.length;
        for (int i=0;i<len-1;i++){
            for (int j=i+1;j<len;j++){
                key.set(users[i] + "-" + users[j]);
                val.set(friend);
                context.write(key,val);
            }
        }
    }

}
  • StepTwoReducer.java
package hadoop.common.friends;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class StepTwoReducer extends Reducer<Text,Text,Text,Text>{

    private static Text out = new Text();

    @Override
    protected void reduce(Text friend, Iterable<Text> users, Context context) throws IOException, InterruptedException {
        StringBuilder sb = new StringBuilder();
        for (Text person : users){
            sb.append(person).append(",");
        }
        String outStr = sb.toString().substring(0,sb.toString().length()-1);
        out.set(outStr);
        context.write(friend,out);
    }
}
  • Main.java
package hadoop.common.friends;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob;
import org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.log4j.BasicConfigurator;

public class Main {
    public static void main(String[] args) throws Exception {
        // 使用缺省Log4j环境。
        BasicConfigurator.configure(); 
        String step1InPath = "F:\\work\\hadoop\\connon-friends\\data.txt";
        String step1OutPath = "F:\\work\\hadoop\\step1_reduce";
        String step2OutPath = "F:\\work\\hadoop\\step2_reduce";
        Path step1In = new Path(step1InPath);
        Path step1Out = new Path(step1OutPath);
        Path step2Out = new Path(step2OutPath);

        Configuration conf = new Configuration();
        conf.set("mapreduce.framework.name","local");
        conf.set("fs.defaultFS","file:///");

        FileSystem fs= FileSystem.get(conf);

        if(fs.exists(step1Out)){
            fs.delete(step1Out);
        }
        if(fs.exists(step2Out)){
            fs.delete(step2Out);
        }
        //第一阶段job配置
        Job step1 = Job.getInstance(conf,"step1");
        step1.setJarByClass(Main.class);
        step1.setMapperClass(StepOneMapper.class);
        step1.setReducerClass(StepOneReducer.class);
        step1.setMapOutputKeyClass(Text.class);
        step1.setMapOutputValueClass(Text.class);
        step1.setOutputKeyClass(Text.class);
        step1.setOutputValueClass(Text.class);
        FileInputFormat.addInputPath(step1,step1In);
        FileOutputFormat.setOutputPath(step1,step1Out);

        //第二阶段job配置
        Job step2 = Job.getInstance(conf,"step2");
        step2.setJarByClass(Main.class);
        step2.setMapperClass(StepTwoMapper.class);
        step2.setReducerClass(StepTwoReducer.class);
        step2.setMapOutputKeyClass(Text.class);
        step2.setMapOutputValueClass(Text.class);
        step2.setOutputKeyClass(Text.class);
        step2.setOutputValueClass(Text.class);
        FileInputFormat.addInputPath(step2,step1Out);
        FileOutputFormat.setOutputPath(step2,step2Out);

        //job依赖关系配置
        ControlledJob controlledJob1=new ControlledJob(step1.getConfiguration());
        controlledJob1.setJob(step1);

        ControlledJob controlledJob2=new ControlledJob(step2.getConfiguration());
        controlledJob2.setJob(step2);

        controlledJob2.addDependingJob(controlledJob1);

        JobControl jc = new JobControl("jc");
        jc.addJob(controlledJob1);
        jc.addJob(controlledJob2);

        Thread thread = new Thread(jc);
        thread.start();
        while(!jc.allFinished()) {
            if (jc.allFinished()) {
                System.out.println(jc.getSuccessfulJobList());
                jc.stop();
                break;
            }
            if (jc.getFailedJobList().size() > 0) {
                System.out.println(jc.getFailedJobList());
                jc.stop();
                break;
            }
        }
    }
}
  • data.txt内容
A:B,C,D,E,F,O
B:A,C,E,F
C:F,A,D,I
D:A,E,F,L
E:B,C,D,M,L
F:A,B,C,D,E,O,M
G:A,C,D,E,F
H:A,C,D,E,O
I:A,O
J:B,O
K:A,C,D
L:D,E,F
M:E,F,G
O:A,H,I,J
  • 第一个任务的输出
A	F,H,O,D,G,B,K,C,I,
B	E,A,J,F,
C	H,K,A,G,B,E,F,
D	E,C,L,K,A,H,G,F,
E	A,F,H,L,M,D,B,G,
F	B,M,L,G,A,D,C,
G	M,
H	O,
I	C,O,
J	O,
L	D,E,
M	E,F,
O	A,H,I,J,F,
  • 第二个任务的输出
A-B	C,E,F
A-C	D,F
A-D	E,F
A-E	C,D,B
A-F	C,O,D,B,E
A-G	F,C,D,E
A-H	C,D,E,O
A-I	O
A-J	O,B
A-K	D,C
A-L	E,D,F
A-M	F,E
B-C	F,A
B-D	F,E,A
B-E	C
B-F	C,A,E
B-G	E,C,F,A
B-H	C,A,E
B-I	A
B-K	C,A
B-L	F,E
B-M	E,F
B-O	A
C-D	F,A
C-E	D
C-F	D,A
C-G	F,D,A
C-H	D,A
C-I	A
C-K	A,D
C-L	F,D
C-M	F
C-O	A,I
D-E	L
D-F	A,E
D-G	F,A,E
D-H	A,E
D-I	A
D-K	A
D-L	E,F
D-M	F,E
D-O	A
E-F	D,M,C,B
E-G	C,D
E-H	C,D
E-J	B
E-K	C,D
E-L	D
F-G	D,C,E,A
F-H	A,C,O,D,E
F-I	O,A
F-J	B,O
F-K	C,D,A
F-L	E,D
F-M	E
F-O	A
G-H	C,D,E,A
G-I	A
G-K	C,A,D
G-L	D,E,F
G-M	E,F
G-O	A
H-I	A,O
H-J	O
H-K	A,C,D
H-L	E,D
H-M	E
H-O	A
I-J	O
I-K	A
I-O	A
K-L	D
K-O	A
L-M	F,E

 

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值