MapReduce实现内连接,左连接,右连接,全连接,反连接

测试数据   

more user.txt

more post.txt

(用户id,用户名)  

1   用户1  

2   用户2  

3   用户3

 

(用户id,帖子id,标题)  

1   1   贴子1  

1   2   贴子2  

2   3   帖子3  

4   4   贴子4  

5   5   贴子5  

5   6   贴子6  

5   7   贴子7  

 代码

import java.io.IOException;  
import java.util.ArrayList;  
import org.apache.hadoop.conf.Configuration;  
import org.apache.hadoop.fs.Path;  
import org.apache.hadoop.io.Text;  
import org.apache.hadoop.mapreduce.Job;  
import org.apache.hadoop.mapreduce.Mapper;  
import org.apache.hadoop.mapreduce.Reducer;  
import org.apache.hadoop.mapreduce.lib.input.MultipleInputs;  
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class UserAndPost {
public static class UserJoinMapper extends Mapper<Object,Text,Text,Text>{
	private Text outkey=new Text();
	private Text outvalue=new Text();
	@Override
	protected void map(Object key, Text value, Context context)throws IOException, InterruptedException {
		String[] str= value.toString().split("\t");  
		String id=str[0];
		if(id==null) {
			return;
		}
		outkey.set(id);
		outvalue.set("u"+value.toString());
		context.write(outkey,outvalue);
}
}
public static class PostJoinMapper extends Mapper<Object,Text,Text,Text>{
	private Text outkey=new Text();
	private Text outvalue=new Text();
	@Override
	protected void map(Object key, Text value, Context context)throws IOException, InterruptedException {
		String[] str= value.toString().split("\t");  
		String id=str[0];
		if(id==null) {
			return;
		}
		outkey.set(id);
		outvalue.set("p"+value.toString());
		context.write(outkey,outvalue);
}
}
public static class JoinReducer extends Reducer<Text,Text,Text,Text>{
	private ArrayList<Text> listuser=new ArrayList<Text>();
	private ArrayList<Text> listpost=new ArrayList<Text>();
	private String joinType=null;
	@Override
	protected void setup(Context context) {
		joinType=context.getConfiguration().get("join.type");
	}
	@Override
	protected void reduce(Text key, Iterable<Text> values,Context context)throws IOException, InterruptedException {
		listuser.clear();
		listpost.clear();
		for(Text t:values) {
			if(t.charAt(0)=='u') {
				listuser.add(new Text(t.toString().substring(1)));
			}else if(t.charAt(0)=='p') {
				listpost.add(new Text(t.toString().substring(1)));
			}
		}
		excuteJoinLogic(context);
	}	
	private void excuteJoinLogic(Context context)throws IOException,InterruptedException{
		if(joinType.equalsIgnoreCase("inner")) {//内连接
			if(!listuser.isEmpty()&&!listpost.isEmpty()) {
				for(Text user:listuser) {
					for(Text post:listpost) {
						context.write(user,post);}
					}}}
		else if(joinType.equalsIgnoreCase("leftouter")){//左外连接
			for(Text user:listuser) {
				if(!listpost.isEmpty()) {
					for(Text post:listpost) {
						context.write(user,post);}
					}else {
						context.write(user,new Text(""));
					}}}
		else if(joinType.equalsIgnoreCase("rightouter")){//右外连接
			for(Text post:listpost) {
				if(!listuser.isEmpty()) {
					for(Text user:listuser) {
						context.write(user,post);}
					}else {
						context.write(new Text("\t \t"),post);
					}}}
		else if(joinType.equalsIgnoreCase("fullouter")){//全外连接
				if(!listuser.isEmpty()) {
					for(Text user:listuser) {
						if(!listpost.isEmpty()) {
							for(Text post:listpost) {
								context.write(user,post);}
							}else {
								context.write(user,new Text(""));}}
					}else {
						for(Text post:listpost) {
							context.write(new Text("\t \t"),post);
						}}}
		else if(joinType.equalsIgnoreCase("anti")) {//反连接
			if(listuser.isEmpty()||listpost.isEmpty()) {
				for(Text user:listuser) {
					context.write(user,new Text(""));}
					for(Text post:listpost) {
						context.write(new Text("\t \t"),post);
					}}}}}
public static void main(String[] args) throws Exception{
	Configuration conf = new Configuration();  
	Path inpath1=new Path("hdfs://192.168.109.125:8020/input/userandpost/user.txt");
	Path inpath2=new Path("hdfs://192.168.109.125:8020/input/userandpost/post.txt");
	Path outpath=new Path("hdfs://192.168.109.125:8020/output/userandpost/anti");
	String joinType="anti";
	Job job=Job.getInstance(conf);
    job.getConfiguration().set("join.type",joinType);  
    job.setJarByClass(UserAndPost.class);
    MultipleInputs.addInputPath(job,inpath1,TextInputFormat.class,UserJoinMapper.class);  
    MultipleInputs.addInputPath(job,inpath2,TextInputFormat.class,PostJoinMapper.class);  
    job.setReducerClass(JoinReducer.class);
    FileOutputFormat.setOutputPath(job,outpath);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.waitForCompletion(true);
} 
}

测试结果:

内连接:

左外连接:

右外连接:

全外连接:

反连接:

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值