Join On MapReduce
第一种实现(Map Join)
思路:
准备数据:
cls.txt
CLassA,Alex,R108,1
ClassB,Mike,R115,2
ClassC,Jack,R121,3
ClassD,Nike,R206,4
stus.txt
1001,Tomm,male,20,3
1002,Lucy,female,18,2
1003,Mark,male,19,1
2001,Json,male,21,3
3001,Rose,female,20,2
2002,Bobb,male,21,4
2003,Bill,male,20,3
3002,Jame,male,21,1
2004,Vick,male,20,2
3003,Winn,female,18,4
1004,York,male,20,2
3004,Nora,female,18,4
编写代码:
1.自定义数据类型用以储存tag标签和输出数据
package com.nike.hadoop.mapred.join02;
import org.apache.hadoop.io.Writable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
public class CuWritable implements Writable {
private int tag;
private String data;
public CuWritable() {
}
public CuWritable(int tag, String data) {
this.tag = tag;
this.data = data;
}
public int getTag() {
return tag;
}
public void setTag(int tag) {
this.tag = tag;
}
public String getData() {
return data;
}
public void setData(String data) {
this.data = data;
}
@Override
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeInt(tag);
dataOutput.writeUTF(data);
}
@Override
public void readFields(DataInput dataInput) throws IOException {
this.tag = dataInput.readInt();
this.data = dataInput.readUTF();
}
}
2.MapReduce的具体实现
package com.nike.hadoop.mapred.join02;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class JoinMap01 extends Mapper<LongWritable, Text,Text,CuWritable> {
private Text keyOut = new Text();
private CuWritable cu = new CuWritable();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] strs = value.toString().split(",");
keyOut.set(strs[strs.length-1])