MapReduce的join操作单个Map单个文件

1、需求与实现思路
(1)需求
有2个数据文件:订单数据、商品信息。
订单数据表order
在这里插入图片描述
商品信息表product
在这里插入图片描述
需要用MapReduce程序来实现下面这个SQL查询运算:
select o.id order_id, o.date, o.amount, p.id p_id, p.pname, p.c
ategory_id, p.price
from t_order o join t_product p on o.pid = p.id
(2)实现思路
SQL的执行结果是这样的:
在这里插入图片描述
实际上就是给每条订单记录补充上商品表中的信息。
实现思路:
1)定义bean
把SQL执行结果中的各列封装成一个bean对象,实现序列化。
bean中还要有一个另外的属性flag,用来标识此对象的数据是订单还是商品。
2)map处理
map会处理两个文件中的数据,根据文件名可以知道当前这条数据是订单还是商品。
对每条数据创建一个bean对象,设置对应的属性,并标识flag(0代表order,1代表product)
以join的关联项“productid”为key,bean为value进行输出。
3)reduce处理
reduce方法接收到pid相同的一组bean对象。
遍历bean对象集合,如果bean是订单数据,就放入一个新的订单集合中,如果是商品数据,就保存到一个商品bean中。然后遍历那个新的订单集合,使用商品bean的数据对每个订单bean进行信息补全。
这样就得到了完整的订单及其商品信息。

Pom.xml文件如下:

在这里插入图片描述

序列化类:InfoBean

package join;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.Writable;
public class InfoBean implements Writable{
	private int order_id;
	private String dataString;
	private String p_id;
	private int amount;
	private String pname;
	private int category_id;
	private float price;
	
	private String flag;
	
	public InfoBean() {
	}

	public void set(int order_id, String dataString, String p_id, int amount,
			String pname, int category_id, float price, String flag) {
		this.order_id = order_id;
		this.dataString = dataString;
		this.p_id = p_id;
		this.amount = amount;
		this.pname = pname;
		this.category_id = category_id;
		this.price = price;
		this.flag = flag;
	}

	public void readFields(DataInput in) throws IOException {
		this.order_id = in.readInt();
		this.dataString = in.readUTF();
		this.p_id = in.readUTF();
		this.amount = in.readInt();
		this.pname = in.readUTF();
		this.category_id = in.readInt();
		this.price = in.readFloat();
		this.flag = in.readUTF();
	}

	public void write(DataOutput out) throws IOException {
		out.writeInt(order_id);
		out.writeUTF(dataString);
		out.writeUTF(p_id);
		out.writeInt(amount);
		out.writeUTF(pname);
		out.writeInt(category_id);
		out.writeFloat(price);
		out.writeUTF(flag);
	}

	public int getOrder_id() {
		return order_id;
	}

	public void setOrder_id(int order_id) {
		this.order_id = order_id;
	}

	public String getDataString() {
		return dataString;
	}

	public void setDataString(String dataString) {
		this.dataString = dataString;
	}

	public String getP_id() {
		return p_id;
	}

	public void setP_id(String p_id) {
		this.p_id = p_id;
	}

	public int getAmount() {
		return amount;
	}

	public void setAmount(int amount) {
		this.amount = amount;
	}

	public String getPname() {
		return pname;
	}

	public void setPname(String pname) {
		this.pname = pname;
	}

	public int getCategory_id() {
		return category_id;
	}

	public void setCategory_id(int category_id) {
		this.category_id = category_id;
	}

	public float getPrice() {
		return price;
	}

	public void setPrice(float price) {
		this.price = price;
	}

	public String getFlag() {
		return flag;
	}

	public void setFlag(String flag) {
		this.flag = flag;
	}

	@Override
	public String toString() {
		return "InfoBean [order_id=" + order_id + ", dataString=" + dataString
				+ ", p_id=" + p_id + ", pname=" + pname + ", category_id="
				+ category_id + ", price=" + price + "]";
	}
}

主类:JoinMR

package join;
import java.io.IOException;
import java.util.ArrayList;
import join.InfoBean;
importorg.apache.commons.beanutils.BeanUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class JoinMR {
	static class JoinMRMapper extends Mapper<LongWritable,Text,Text,InfoBean>{
		InfoBean bean = new InfoBean();
		Text k =new Text();
		
		@Override
		protected void map(LongWritable key, Text value,Context context)throws IOException, InterruptedException {
			String line = value.toString();
			String[] fields = line.split(" ");
			FileSplit inputSplit = (FileSplit) context.getInputSplit();
			String filename = inputSplit.getPath().getName();
			String pid ="";
			if(filename.startsWith("order")){
				pid = fields[2];
				bean.set(Integer.parseInt(fields[0]), fields[1], pid, Integer.parseInt(fields[3]), "", 0, 0f, "0");
			}else{
				pid = fields[0];
				bean.set(0, "", pid, 0, fields[1], Integer.parseInt(fields[2]), Float.parseFloat(fields[3]), "1");
			}
			k.set(pid);
			context.write(k, bean);
		}
	}
	
	static class JoinMRReducer extends Reducer<Text, InfoBean, InfoBean, NullWritable>{
		@Override
		protected void reduce(Text pid, Iterable<InfoBean> beans,Context context)throws IOException, InterruptedException {
			InfoBean pdBean = new InfoBean();
			ArrayList<InfoBean> orderBeans = new ArrayList<InfoBean>();
			try{
				for (InfoBean bean : beans) {
					if("1".equals(bean.getFlag())){
						BeanUtils.copyProperties(pdBean, bean);
					}else{
						InfoBean odbean = new InfoBean();
						BeanUtils.copyProperties(odbean, bean);
						orderBeans.add(odbean);
					}
				}
			}catch (Exception e){
				
			}
			//插入商品表的信息到订单表中去
			for(InfoBean bean : orderBeans){
				bean.setPname(pdBean.getPname());
				bean.setCategory_id(pdBean.getCategory_id());
				bean.setPrice(pdBean.getPrice());
				context.write(bean, NullWritable.get());
			}
		}
	}
	
	public static void main(String[] args) throws Exception {
		Configuration conf = new Configuration();
		Job job = Job.getInstance(conf);
		FileSystem fs = FileSystem.get(conf);
		if(fs.exists(new Path(args[1]))){
			fs.delete(new Path(args[1]), true);
		}
		job.setJarByClass(JoinMR.class);
		job.setJobName("JoinMR");
		
		job.setMapperClass(JoinMRMapper.class);
		job.setReducerClass(JoinMRReducer.class);
		
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(InfoBean.class);
		
		job.setOutputKeyClass(InfoBean.class);
		job.setOutputValueClass(NullWritable.class);
		
		FileInputFormat.setInputPaths(job, new Path(args[0]));
		
		FileOutputFormat.setOutputPath(job, new Path(args[1]));
		//FileInputFormat.setInputPaths(job, new Path(args[0]),new Path(args[1]));
		/*for(int i=0;i<args.length-1;i++){
			FileInputFormat.addInputPaths(job, args[i]);
		}*/
		
		job.waitForCompletion(true);
	}
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值