mapreduce 实现内连接,左连接,右连接,全连接,反连接

16 篇文章 0 订阅
数据集
user(id name)用户表
1	user1
2	user2
3	user3
4	user4
5	user5
6	user6


post(userid postid postname)帖子表
1	1	post1
1	2	post2
2	3	post3
4	4	post4
5	5	post5
8	6	post6
8	7	post7
8	8	post8
package com.test;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.MultipleInputs;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;


/*
 * mapreduce实现左连接、右连接、全连接、反连接
 */
public class UserAndPostJoinJob {
	
	static class UserAndPostWritable implements Writable{
		/*
		 * 类型 U表示用户,P表示帖子 
		 */
		private String type;
		private String data;
		
		public UserAndPostWritable(){}
		
		public UserAndPostWritable(String type, String data) {
			super();
			this.type = type;
			this.data = data;
		}
		public String getType() {
			return type;
		}
		public void setType(String type) {
			this.type = type;
		}
		public String getData(){
			return data;
		}
		public void setData(String data) {
			this.data = data;
		}

		@Override
		public void readFields(DataInput input) throws IOException {
			// TODO Auto-generated method stub
			type = input.readUTF();
			data = input.readUTF();
		}

		@Override
		public void write(DataOutput output) throws IOException {
			// TODO Auto-generated method stub
			output.writeUTF(type);
			output.writeUTF(data);
		}
		
	}
	
	static class UserMapper extends Mapper<LongWritable, Text, Text, UserAndPostWritable> {
		protected void map(LongWritable key, Text value, Context context ) throws IOException, InterruptedException {
			String[] arr = value.toString().split("\t");
			Text userId = new Text(arr[0]);
			context.write(userId, new UserAndPostWritable("U", value.toString()));
		}
	}
	
	static class PostMapper extends Mapper<LongWritable, Text, Text, UserAndPostWritable> {
		protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException{
			String[] arr = value.toString().split("\t");
			Text userId = new Text(arr[0]);
			context.write(userId, new UserAndPostWritable("P",value.toString()));
		}
		
	}
	
	static class PostReducer extends Reducer<Text, UserAndPostWritable, Text, Text> {
		private List<Text> users = new ArrayList<Text>();
		private List<Text> posts = new ArrayList<Text>();
		
		private String joinType;
		
		protected void setup(Context context) throws IOException,InterruptedException {
			super.setup(context);
			joinType = context.getConfiguration().get("joinType");
		}
		protected void reduce(Text key, Iterable<UserAndPostWritable> iterable, Context context) throws IOException, InterruptedException{
			users.clear();
			posts.clear();
			for(UserAndPostWritable data : iterable) {
				if (data.getType().equals("U")) {
					users.add(new Text(data.getData()));
				} else {
					posts.add(new Text(data.getData()));
				}
			}
			if("innerJoin".equals(joinType)) {
				if(users.size() > 0 && posts.size() > 0){
					for(Text user : users) {
						for(Text post : posts) {
							context.write(new Text(user),new Text(post));
						}
					}
				}
			}else if("leftOuterJoin".equals(joinType)) {
				for(Text user : users) {
					if(posts.size() > 0) {
						for(Text post : posts) {
							context.write(new Text(user), new Text(post));
						}
					} else {
						context.write(new Text(user), new Text(" \t \t "));
					}
				}
			}else if("rightOuterJoin".equals(joinType)) {
				for(Text post : posts) {
					if(users.size() > 0) {
						for(Text user : users) {
							context.write(new Text(user), new Text(post));
						}
					} else {
						context.write(new Text(" \t "), new Text(post));
					}
				}
			}else if("fullOuterJoin".equals(joinType)) {
				if(users.size() > 0) {
					for(Text user : users) {
						if(posts.size() > 0) {
							for(Text post : posts) {
								context.write(new Text(user), new Text(post));
							}
						} else {
							context.write(new Text(user), new Text(" \t \t "));
						}
					}
				} else {
					for(Text post : posts) {
						if(users.size() > 0) {
							for(Text user : users) {
								context.write(new Text(user), new Text(post));
							}
						} else {
							context.write(new Text(" \t "), new Text(post));
						}
					}
				}
			}else if("anti".equals(joinType)) {
				if(users.size() == 0 ^ posts.size() == 0) {
					for(Text user : users) {
						context.write(new Text(user), new Text(" \t \t "));
					}
					for(Text post : posts) {
						context.write(new Text(" \t "), new Text(post));
					}
				}
			}
		}
	}

	public static void main(String[] args) throws Exception {
		Configuration conf = new Configuration();
		
		String queue = "hql";
		if(args.length > 4) {
			queue = args[4].matches("hql|dstream|mapred|udw|user|common") ? args[1] : "hql";
		}
		
		String joinType = args[0];
		String userInputPath = args[1];
		String postInputPath = args[2];
		String outputPath = args[3];
		
		conf.set("mapreduce.job.queuename",queue);
		Job job = Job.getInstance(conf,"JoinTest");
		
		job.getConfiguration().set("joinType", joinType);
		job.getConfiguration().set("mapred.textoutputformat.separator", "\t");
		
		job.setJarByClass(UserAndPostJoinJob.class);
		MultipleInputs.addInputPath(job, new Path(userInputPath), TextInputFormat.class, UserMapper.class);
		MultipleInputs.addInputPath(job, new Path(postInputPath), TextInputFormat.class, PostMapper.class);
		
		job.setReducerClass(PostReducer.class);
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(UserAndPostWritable.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);
		
		job.setOutputFormatClass(TextOutputFormat.class);
		
		FileSystem fs = FileSystem.get(conf);
		Path outPath = new Path(outputPath);
//		if(fs.exists(outPath)) {
//			fs.delete(outPath);
//		}
		FileOutputFormat.setOutputPath(job, outPath);
		
		FileOutputFormat.setCompressOutput(job, true);
		FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
		
		job.waitForCompletion(true);

	}

}



  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值