import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.Writable;
/**
* 传输的bean对象
* <p>Title: OrderBean</p>
* <p>Description: </p>
* <p>Company: www.xnlc.cn</p>
* @author 黄庭华
* @date 2016年7月17日下午2:52:36
* @version 1.0
*/
public class OrderBean implements Writable{
private String oid;//订单id
private String pid;//商品id
private int amount;//订单中商品的数量
private String pname;//商品名称
private String flag;//标记符号 "0"代表订单表 "1"代表商品表
//输出到文件中的内容由tostring方法控制
@Override
public String toString() {
return oid + "\t" + pname +"\t"+ amount;
}
//方便order表对象存储
public void setOrder(String oid,String pid,int amount){
this.oid=oid;
this.pid=pid;
this.amount=amount;
this.pname="";
this.flag="0";
}
//方便pro表对象存储
public void setPro(String pid,String pname){
this.oid="";
this.pid=pid;
this.amount=0;
this.pname=pname;
this.flag="1";
}
public String getOid() {
return oid;
}
public void setOid(String oid) {
this.oid = oid;
}
public String getPid() {
return pid;
}
public void setPid(String pid) {
this.pid = pid;
}
public int getAmount() {
return amount;
}
public void setAmount(int amount) {
this.amount = amount;
}
public String getPname() {
return pname;
}
public void setPname(String pname) {
this.pname = pname;
}
public String getFlag() {
return flag;
}
public void setFlag(String flag) {
this.flag = flag;
}
@Override
public void readFields(DataInput in) throws IOException {
this.oid=in.readUTF();
this.pid=in.readUTF();
this.pname=in.readUTF();
this.amount=in.readInt();
this.flag=in.readUTF();
}
@Override
public void write(DataOutput out) throws IOException {
out.writeUTF(oid);
out.writeUTF(pid);
out.writeUTF(pname);
out.writeInt(amount);
out.writeUTF(flag);
}
}
//=========================================
package cn.nyzc.reducejoin;
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
/**
*
* <p>Title: ReduceJoinMapper</p>
* <p>Description: </p>
* <p>Company: www.xnlc.cn</p>
* @author 黄庭华
* @date 2016年7月17日下午2:52:52
* @version 1.0
*/
public class ReduceJoinMapper extends Mapper<LongWritable, Text, Text, OrderBean> {
// 输出数据的暂存容器
OrderBean bean = new OrderBean();
Text text = new Text();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
// 首先判断输入数据出自哪张表
FileSplit sp = (FileSplit) context.getInputSplit();
String name = sp.getPath().getName();
// 判断
if (name.startsWith("order")) {
// 获取数据
String[] values = value.toString().split("\t");
// 处理数据
bean.setOrder(values[0], values[1], Integer.parseInt(values[2]));
text.set(values[1]);
} else if (name.startsWith("pd")) {
// 获取数据
String[] values = value.toString().split("\t");
// 处理数据
bean.setPro(values[0], values[1]);
text.set(values[0]);
}
// 输出数据
context.write(text, bean);
}
}
//============================================
package cn.nyzc.reducejoin;
import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.beanutils.BeanUtils;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
/**
*
* <p>Title: ReduceJoinReducer</p>
* <p>Description: </p>
* <p>Company: www.xnlc.cn</p>
* @author 黄庭华
* @date 2016年7月17日下午2:51:48
* @version 1.0
*/
public class ReduceJoinReducer extends Reducer<Text, OrderBean, OrderBean, NullWritable>{
//输出数据的暂存容器
List<OrderBean> olist=new ArrayList<>();
String pname;
//OrderBean pro=new OrderBean();
@Override
protected void reduce(Text key, Iterable<OrderBean> values,
Context context) throws IOException, InterruptedException {
//进入方法之前,先把缓存容器清空
olist.clear();
//遍历并分别处理bean对象
for (OrderBean orderBean : values) {
//取出判断的标记
String flag = orderBean.getFlag();
//判断bean对象的类型
if(flag.equals("0")){
OrderBean buf=new OrderBean();
try {
BeanUtils.copyProperties(buf, orderBean);
} catch (IllegalAccessException | InvocationTargetException e) {
e.printStackTrace();
}
//把遍历出来的bean添加到缓存容器中
olist.add(buf);
}else if(flag.equals("1")){
//把需要合并的字段缓存一下
pname=orderBean.getPname();
}
}
//输出数据
for (OrderBean bean : olist) {
//把合并的短缺字段补齐
bean.setPname(pname);
context.write(bean, NullWritable.get());
}
}
}
//======================================
package cn.nyzc.reducejoin;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/**
* 任务驱动类
* <p>Title: ReduceJoinDriver</p>
* <p>Description: </p>
* <p>Company: www.xnlc.cn</p>
* @author 黄庭华
* @date 2016年7月17日下午2:54:27
* @version 1.0
*/
public class ReduceJoinDriver {
public static void main(String[] args) throws Exception {
args=new String[]{"e:/fortest/input03","e:/output01"};
//1 创建任务对象Job
Job job = Job.getInstance(new Configuration());
//2 设置jar所在位置
job.setJarByClass(ReduceJoinDriver.class);
//3 设置mapreduce程序运行的主类
job.setMapperClass(ReduceJoinMapper.class);
job.setReducerClass(ReduceJoinReducer.class);
//4 设置各阶段输出类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(OrderBean.class);
job.setOutputKeyClass(OrderBean.class);
job.setOutputValueClass(NullWritable.class);
//5 设置数据源和结果数据的路径
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
//6 提交
boolean b = job.waitForCompletion(true);
System.exit(b?0:1);
}
}