目录
类似求取共同好友、二度人脉这类的任务(当然这里的环境是使用Hadoop MapReuce程序实现)解法。
1 求取共同好友思路
数据记录格式如下:
A:B,C,D,E,F 其中,A表示用户,":"后面的表示其好友列表。
分为两个MR步骤共4个阶段:
- 第一个任务的map阶段:写出的是(B A),(C A),(D A),(E A),(F A),表示A是哪些人的好友;
- 第一个任务的reduce阶段:写出的是(A F,H,O,D,G,B,K,C,I),表示F,H…C,I均有A这个好友;
- 第二个任务的map阶段:写出的是(F-H A),(F-O A)…(H-0 A),(H-D A)…表示F-H 有共同好友A;
- 第二个任务的reduce阶段:写出的是(F-H A,D,C)…,表示F-H的所有共同好友。
2 下面是具体的代码
- StepOneMapper.java
package hadoop.common.friends;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class StepOneMapper extends Mapper<LongWritable, Text,Text,Text> {
private static Text key = new Text();
private static Text val = new Text();
@Override
protected void map(LongWritable a, Text value, Context context) throws IOException, InterruptedException {
//A:B,C,D,E,F
String line = value.toString();
String[] userAndFriends = line.split(":");
String user = userAndFriends[0];
String friends = userAndFriends[1];
for (String friend:friends.split(",")){
key.set(friend);
val.set(user);
context.write(key,val);
}
}
}
- StepOneReducer.java
package hadoop.common.friends;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class StepOneReducer extends Reducer<Text,Text,Text,Text> {
private static Text val = new Text();
@Override
protected void reduce(Text friend, Iterable<Text> users, Context context) throws IOException, InterruptedException {
StringBuilder stringBuilder = new StringBuilder();
for (Text person:users){
stringBuilder.append(person).append(",");
}
val.set(stringBuilder.toString());
context.write(friend,val);
}
}
- StepTwoMapper.java
package hadoop.common.friends;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
import java.util.Arrays;
public class StepTwoMapper extends Mapper<LongWritable, Text,Text,Text> {
private static Text key = new Text();
private static Text val = new Text();
//A F,H,D,G,B,K,C,I,O,
@Override
protected void map(LongWritable a, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString();
String[] friendAndUsers = line.split("\t");
String friend = friendAndUsers[0];
String[] users = friendAndUsers[1].split(",");
Arrays.sort(users);
int len = users.length;
for (int i=0;i<len-1;i++){
for (int j=i+1;j<len;j++){
key.set(users[i] + "-" + users[j]);
val.set(friend);
context.write(key,val);
}
}
}
}
- StepTwoReducer.java
package hadoop.common.friends;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class StepTwoReducer extends Reducer<Text,Text,Text,Text>{
private static Text out = new Text();
@Override
protected void reduce(Text friend, Iterable<Text> users, Context context) throws IOException, InterruptedException {
StringBuilder sb = new StringBuilder();
for (Text person : users){
sb.append(person).append(",");
}
String outStr = sb.toString().substring(0,sb.toString().length()-1);
out.set(outStr);
context.write(friend,out);
}
}
- Main.java
package hadoop.common.friends;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob;
import org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.log4j.BasicConfigurator;
public class Main {
public static void main(String[] args) throws Exception {
// 使用缺省Log4j环境。
BasicConfigurator.configure();
String step1InPath = "F:\\work\\hadoop\\connon-friends\\data.txt";
String step1OutPath = "F:\\work\\hadoop\\step1_reduce";
String step2OutPath = "F:\\work\\hadoop\\step2_reduce";
Path step1In = new Path(step1InPath);
Path step1Out = new Path(step1OutPath);
Path step2Out = new Path(step2OutPath);
Configuration conf = new Configuration();
conf.set("mapreduce.framework.name","local");
conf.set("fs.defaultFS","file:///");
FileSystem fs= FileSystem.get(conf);
if(fs.exists(step1Out)){
fs.delete(step1Out);
}
if(fs.exists(step2Out)){
fs.delete(step2Out);
}
//第一阶段job配置
Job step1 = Job.getInstance(conf,"step1");
step1.setJarByClass(Main.class);
step1.setMapperClass(StepOneMapper.class);
step1.setReducerClass(StepOneReducer.class);
step1.setMapOutputKeyClass(Text.class);
step1.setMapOutputValueClass(Text.class);
step1.setOutputKeyClass(Text.class);
step1.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(step1,step1In);
FileOutputFormat.setOutputPath(step1,step1Out);
//第二阶段job配置
Job step2 = Job.getInstance(conf,"step2");
step2.setJarByClass(Main.class);
step2.setMapperClass(StepTwoMapper.class);
step2.setReducerClass(StepTwoReducer.class);
step2.setMapOutputKeyClass(Text.class);
step2.setMapOutputValueClass(Text.class);
step2.setOutputKeyClass(Text.class);
step2.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(step2,step1Out);
FileOutputFormat.setOutputPath(step2,step2Out);
//job依赖关系配置
ControlledJob controlledJob1=new ControlledJob(step1.getConfiguration());
controlledJob1.setJob(step1);
ControlledJob controlledJob2=new ControlledJob(step2.getConfiguration());
controlledJob2.setJob(step2);
controlledJob2.addDependingJob(controlledJob1);
JobControl jc = new JobControl("jc");
jc.addJob(controlledJob1);
jc.addJob(controlledJob2);
Thread thread = new Thread(jc);
thread.start();
while(!jc.allFinished()) {
if (jc.allFinished()) {
System.out.println(jc.getSuccessfulJobList());
jc.stop();
break;
}
if (jc.getFailedJobList().size() > 0) {
System.out.println(jc.getFailedJobList());
jc.stop();
break;
}
}
}
}
- data.txt内容
A:B,C,D,E,F,O
B:A,C,E,F
C:F,A,D,I
D:A,E,F,L
E:B,C,D,M,L
F:A,B,C,D,E,O,M
G:A,C,D,E,F
H:A,C,D,E,O
I:A,O
J:B,O
K:A,C,D
L:D,E,F
M:E,F,G
O:A,H,I,J
- 第一个任务的输出
A F,H,O,D,G,B,K,C,I,
B E,A,J,F,
C H,K,A,G,B,E,F,
D E,C,L,K,A,H,G,F,
E A,F,H,L,M,D,B,G,
F B,M,L,G,A,D,C,
G M,
H O,
I C,O,
J O,
L D,E,
M E,F,
O A,H,I,J,F,
- 第二个任务的输出
A-B C,E,F
A-C D,F
A-D E,F
A-E C,D,B
A-F C,O,D,B,E
A-G F,C,D,E
A-H C,D,E,O
A-I O
A-J O,B
A-K D,C
A-L E,D,F
A-M F,E
B-C F,A
B-D F,E,A
B-E C
B-F C,A,E
B-G E,C,F,A
B-H C,A,E
B-I A
B-K C,A
B-L F,E
B-M E,F
B-O A
C-D F,A
C-E D
C-F D,A
C-G F,D,A
C-H D,A
C-I A
C-K A,D
C-L F,D
C-M F
C-O A,I
D-E L
D-F A,E
D-G F,A,E
D-H A,E
D-I A
D-K A
D-L E,F
D-M F,E
D-O A
E-F D,M,C,B
E-G C,D
E-H C,D
E-J B
E-K C,D
E-L D
F-G D,C,E,A
F-H A,C,O,D,E
F-I O,A
F-J B,O
F-K C,D,A
F-L E,D
F-M E
F-O A
G-H C,D,E,A
G-I A
G-K C,A,D
G-L D,E,F
G-M E,F
G-O A
H-I A,O
H-J O
H-K A,C,D
H-L E,D
H-M E
H-O A
I-J O
I-K A
I-O A
K-L D
K-O A
L-M F,E