测试数据
more user.txt | more post.txt |
(用户id,用户名) 1 用户1 2 用户2 3 用户3
| (用户id,帖子id,标题) 1 1 贴子1 1 2 贴子2 2 3 帖子3 4 4 贴子4 5 5 贴子5 5 6 贴子6 5 7 贴子7 |
代码
import java.io.IOException;
import java.util.ArrayList;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.MultipleInputs;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class UserAndPost {
public static class UserJoinMapper extends Mapper<Object,Text,Text,Text>{
private Text outkey=new Text();
private Text outvalue=new Text();
@Override
protected void map(Object key, Text value, Context context)throws IOException, InterruptedException {
String[] str= value.toString().split("\t");
String id=str[0];
if(id==null) {
return;
}
outkey.set(id);
outvalue.set("u"+value.toString());
context.write(outkey,outvalue);
}
}
public static class PostJoinMapper extends Mapper<Object,Text,Text,Text>{
private Text outkey=new Text();
private Text outvalue=new Text();
@Override
protected void map(Object key, Text value, Context context)throws IOException, InterruptedException {
String[] str= value.toString().split("\t");
String id=str[0];
if(id==null) {
return;
}
outkey.set(id);
outvalue.set("p"+value.toString());
context.write(outkey,outvalue);
}
}
public static class JoinReducer extends Reducer<Text,Text,Text,Text>{
private ArrayList<Text> listuser=new ArrayList<Text>();
private ArrayList<Text> listpost=new ArrayList<Text>();
private String joinType=null;
@Override
protected void setup(Context context) {
joinType=context.getConfiguration().get("join.type");
}
@Override
protected void reduce(Text key, Iterable<Text> values,Context context)throws IOException, InterruptedException {
listuser.clear();
listpost.clear();
for(Text t:values) {
if(t.charAt(0)=='u') {
listuser.add(new Text(t.toString().substring(1)));
}else if(t.charAt(0)=='p') {
listpost.add(new Text(t.toString().substring(1)));
}
}
excuteJoinLogic(context);
}
private void excuteJoinLogic(Context context)throws IOException,InterruptedException{
if(joinType.equalsIgnoreCase("inner")) {//内连接
if(!listuser.isEmpty()&&!listpost.isEmpty()) {
for(Text user:listuser) {
for(Text post:listpost) {
context.write(user,post);}
}}}
else if(joinType.equalsIgnoreCase("leftouter")){//左外连接
for(Text user:listuser) {
if(!listpost.isEmpty()) {
for(Text post:listpost) {
context.write(user,post);}
}else {
context.write(user,new Text(""));
}}}
else if(joinType.equalsIgnoreCase("rightouter")){//右外连接
for(Text post:listpost) {
if(!listuser.isEmpty()) {
for(Text user:listuser) {
context.write(user,post);}
}else {
context.write(new Text("\t \t"),post);
}}}
else if(joinType.equalsIgnoreCase("fullouter")){//全外连接
if(!listuser.isEmpty()) {
for(Text user:listuser) {
if(!listpost.isEmpty()) {
for(Text post:listpost) {
context.write(user,post);}
}else {
context.write(user,new Text(""));}}
}else {
for(Text post:listpost) {
context.write(new Text("\t \t"),post);
}}}
else if(joinType.equalsIgnoreCase("anti")) {//反连接
if(listuser.isEmpty()||listpost.isEmpty()) {
for(Text user:listuser) {
context.write(user,new Text(""));}
for(Text post:listpost) {
context.write(new Text("\t \t"),post);
}}}}}
public static void main(String[] args) throws Exception{
Configuration conf = new Configuration();
Path inpath1=new Path("hdfs://192.168.109.125:8020/input/userandpost/user.txt");
Path inpath2=new Path("hdfs://192.168.109.125:8020/input/userandpost/post.txt");
Path outpath=new Path("hdfs://192.168.109.125:8020/output/userandpost/anti");
String joinType="anti";
Job job=Job.getInstance(conf);
job.getConfiguration().set("join.type",joinType);
job.setJarByClass(UserAndPost.class);
MultipleInputs.addInputPath(job,inpath1,TextInputFormat.class,UserJoinMapper.class);
MultipleInputs.addInputPath(job,inpath2,TextInputFormat.class,PostJoinMapper.class);
job.setReducerClass(JoinReducer.class);
FileOutputFormat.setOutputPath(job,outpath);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.waitForCompletion(true);
}
}
测试结果:
内连接:
左外连接:
右外连接:
全外连接:
反连接: