Hadoop-MRjob串联之共同好友

MRjob串联之共同好友

数据

描述:A的好友有B,C,D,F,R

A:B,C,D,F,R
B:B,D,R
C:A,G,U,X,D,F,R
D:B,F,D,G,R
E:A,D,F,R
F:B,C,D,T,Y,F,R
G:A,C,Z,K,R
H:B,C,F,G,L,R
I:B,L,D,R,E
J:B,D,G,F,R
K:S,B,C,A

要求:找出A,B的共同好友,找出A,C的共同好友…(所有人两两间的共同好友)

逻辑分析

对A,B的共同好友进行举例
对A,B的共同好友进行举例

Map1->FriendMap1.java
package day4_jobs_input.jobs.friends;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

/**
 * friendmap1
 * @author Fantome
 * @date 2019/5/28
 */
public class FriendMap1 extends Mapper<LongWritable, Text,Text,Text> {
    Text k=new Text();
    Text v=new Text();

    /**
     * 将本人于好友分开,并反转本人和好友k,v位置,转化成好友->本人的形式
     * @param key
     * @param value
     * @param context
     * @throws IOException
     * @throws InterruptedException
     */
    @Override
    protected void map(LongWritable key,
                       Text value,
                       Context context) throws IOException, InterruptedException {
        String line = value.toString();
        //splits[0]->本人,splits[1]->好友(多个)
        String[] splits = line.split(":");
        String person = splits[0];
        v.set(person);
        // 将splits[1]按分开
        String[] friends = splits[1].split(",");
        for (String friend:friends){
            // 反转k和v,输出到reduce端 k:好友(单个),v:本人
            k.set(friend);
            context.write(k,v);
        }
    }
}
Reduce1->FriendReduce1.java
package day4_jobs_input.jobs.friends;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

/**
 * friendreduce1
 * @author Fantome
 * @date 2019/5/28
 */
public class FriendReduce1 extends Reducer<Text,Text,Text,Text> {
    Text v=new Text();

    /**
     * //聚合    k:好友,v:本人(多个)
     * @param key k:好友
     * @param values v:本人
     * @param context
     * @throws IOException
     * @throws InterruptedException
     */
    @Override
    protected void reduce(Text key,
                          Iterable<Text> values,
                          Context context) throws IOException, InterruptedException {
        StringBuffer persons = new StringBuffer();
        //聚合    k:好友,v:本人(多个)
        for (Text person:values){
            persons.append(person).append(",");
        }
        v.set(persons.toString());
        context.write(key,v);
    }
}
Map2->FriendMap2.java
package day4_jobs_input.jobs.friends;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;
import java.util.Arrays;

/**
 * friendmap2
 * @author Fantome
 * @date 2019/5/28
 */
public class FriendMap2 extends Mapper<LongWritable, Text, Text, Text> {
    Text TwoMan=new Text();
    Text v=new Text();

    /**
     * 再次反转对本人1-本人2... 进行聚合,k:本人1-本人2(两个人) v:共同好友
     * @param key
     * @param value 好友1 \t 本人1,本人2,本人3
     * @param context
     * @throws IOException
     * @throws InterruptedException
     */
    @Override
    protected void map(LongWritable key,
                       Text value,
                       Context context) throws IOException, InterruptedException {
        //输入的数据 好友1 \t 本人1,本人2,本人3
        //即 本人1和本人2 有个 共同好友1,本人1和本人3 有个 共同好友1 ......
        String[] splits = value.toString().split("\t");
        String[] persons = splits[1].split(",");
        //将本人的集合进行排序
        Arrays.sort(persons);
        int num=persons.length;
        v.set(splits[0]);
        //输出:本人1-本人2    好友1,本人1-本人3    好友1 ......
        for (int i=0;i<num;i++){
            for (int j=i+1;j<num;j++){
                TwoMan.set(persons[i]+"-"+persons[j]);
                context.write(TwoMan,v);
            }
        }
    }
}
Reduce2->FriendReduce2.java
package day4_jobs_input.jobs.friends;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

/**
 * friendmap2
 * @author Fantome
 * @date 2019/5/28
 */
public class FriendReduce2 extends Reducer<Text, Text, Text, Text> {
    Text v=new Text();
    @Override
    protected void reduce(Text key,
                          Iterable<Text> values,
                          Context context) throws IOException, InterruptedException {
        //聚合输出: k:本人1-本人2    v:共同好友1,共同好友2
        StringBuffer persons = new StringBuffer();
        for (Text value:values){
            persons.append(value.toString()).append(",");
        }
        v.set(persons.toString());
        context.write(key,v);
    }
}
Drive->FriendDriveAll.java
package day4_jobs_input.jobs.friends;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob;
import org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.log4j.BasicConfigurator;

/**
 * friendDrive1
 * @author Fantome
 * @date 2019/5/28
 */
public class FriendDriveAll {
    public static void main(String[] args) throws Exception {
        args=new String[]{"E:\\桌面\\大数据\\test\\friends\\friends.txt",
                "E:\\桌面\\大数据\\test\\friends\\friendOut1",
                "E:\\桌面\\大数据\\test\\friends\\friendOut2"};
        Configuration conf = new Configuration();
        //实例化job1
        Job job1 = Job.getInstance(conf);
        job1.setJarByClass(FriendDrive1.class);
        job1.setMapperClass(FriendMap1.class);
        job1.setReducerClass(FriendReduce1.class);

        job1.setMapOutputKeyClass(Text.class);
        job1.setMapOutputValueClass(Text.class);
        job1.setOutputKeyClass(Text.class);
        job1.setOutputValueClass(Text.class);
        FileInputFormat.setInputPaths(job1,new Path(args[0]));
        FileOutputFormat.setOutputPath(job1,new Path(args[1]));
        //实例化job2,job1的结果是job2的输入
        Job job2 = Job.getInstance(conf);
        job2.setJarByClass(FriendDrive2.class);
        job2.setMapperClass(FriendMap2.class);
        job2.setReducerClass(FriendReduce2.class);

        job2.setMapOutputKeyClass(Text.class);
        job2.setMapOutputValueClass(Text.class);
        job2.setOutputKeyClass(Text.class);
        job2.setOutputValueClass(Text.class);

        FileInputFormat.setInputPaths(job2,new Path(args[1]));
        FileOutputFormat.setOutputPath(job2,new Path(args[2]));

        //job1和job2串联
        JobControl control = new JobControl("friends");
        ControlledJob ajob = new ControlledJob(job1.getConfiguration());
        ControlledJob bjob = new ControlledJob(job2.getConfiguration());
        //定义job1必定要在job2执行完成后开始
        bjob.addDependingJob(ajob);
        control.addJob(ajob);
        control.addJob(bjob);
        Thread thread = new Thread(control);
        thread.start();
        if (!control.allFinished()){
            Thread.sleep(1000);
        }else {
            System.exit(0);
        }

    }
}

job1结果

A	K,G,C,E,
B	A,K,J,I,H,F,D,B,
C	H,K,A,F,G,
D	I,F,A,E,D,C,B,J,
E	I,
F	J,C,H,F,E,A,D,
G	D,C,J,H,
K	G,
L	I,H,
R	F,J,I,E,D,C,A,B,G,H,
S	K,
T	F,
U	C,
X	C,
Y	F,
Z	G,

job2结果

A-B	R,D,B,
A-C	R,D,F,
A-D	D,R,B,F,
A-E	R,F,D,
A-F	R,B,F,D,C,
A-G	R,C,
A-H	F,C,B,R,
A-I	B,D,R,
A-J	F,B,D,R,
A-K	B,C,
B-C	R,D,
B-D	D,R,B,
B-E	R,D,
B-F	B,R,D,
B-G	R,
B-H	B,R,
B-I	D,R,B,
B-J	B,D,R,
B-K	B,
C-D	D,G,R,F,
C-E	A,R,F,D,
C-F	R,F,D,
C-G	R,A,
C-H	R,G,F,
C-I	D,R,
C-J	D,F,R,G,
C-K	A,
D-E	R,F,D,
D-F	F,D,R,B,
D-G	R,
D-H	F,B,G,R,
D-I	D,R,B,
D-J	B,G,F,D,R,
D-K	B,
E-F	D,F,R,
E-G	R,A,
E-H	F,R,
E-I	D,R,
E-J	F,D,R,
E-K	A,
F-G	C,R,
F-H	F,C,B,R,
F-I	R,B,D,
F-J	R,B,D,F,
F-K	B,C,
G-H	R,C,
G-I	R,
G-J	R,
G-K	A,C,
H-I	R,B,L,
H-J	F,G,R,B,
H-K	C,B,
I-J	R,B,D,
I-K	B,
J-K	B,
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值