使用MapReduce实现join操作
-
1、两种方法:
MapJoin和ReduceJoin -
2、MapJoin和ReduceJoin的区别:
MapJoin是会利用cachefile接入数据,与map端接入的数据进行逻辑关联,不需要写reducer(不代表没有shuffle和reduce的过程)
ReduceJoin是map端只完成文件合并,利用相同的关联条件(id)作为key,输出到reduce端,reduce端根据key聚合达到关联的效果 -
3、以下两种实现方法所需要的数据文件:
customers.csv
orders.csv
Reduce端join
编写CustomOrder类
实现writable接口,重写write和readfields方法,要求序列化与反序列化的顺序一致,提供get和set方法,重写toString方法。
package cn.kgc.kb09.join;
import org.apache.hadoop.io.Writable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
/**
* @Qianchun
* @Date 2020/9/10
* @Description
*/
public class CustomOrder implements Writable {
private String customId;
private String customName;
private String orderId;
private String orderStatus;
private String tableFlag;//为0时是custom表,为1时是order表
@Override
//序列化
public void write(DataOutput out) throws IOException {
out.writeUTF(customId);
out.writeUTF(customName);
out.writeUTF(orderId);
out.writeUTF(orderStatus);
out.writeUTF(tableFlag==null?"":tableFlag);
}
//反序列化(序列化与反序列化的顺序要求一致)
@Override
public void readFields(DataInput in) throws IOException {
this.customId=in.readUTF();
this.customName=in.readUTF();
this.orderId=in.readUTF();
this.orderStatus=in.readUTF();
this.tableFlag=in.readUTF();
}
@Override
public String toString() {
return "customId='" + customId + '\'' +
", customName='" + customName + '\'' +
", orderId='" + orderId + '\'' +
", orderStatus='" + orderStatus + '\'';
}
public String getCustomId() {
return customId;
}
public void setCustomId(String customId) {
this.customId = customId;
}
public String getCustomName() {
return customName;
}
public void setCustomName(String customName) {
this.customName = customName;
}
public String getOrderId() {
return orderId;
}
public void setOrderId(String orderId) {
this.orderId = orderId;
}
public String getOrderStatus() {
return orderStatus;
}
public void setOrderStatus(String orderStatus) {
this.orderStatus = orderStatus;
}
public String getTableFlag() {
return tableFlag;
}
public void setTableFlag(String tableFlag) {
this.tableFlag = tableFlag;