1.问题:完成订单表和用户表之间的join
Map阶段map方法中的context参数保存了map负责的切片信息(哪个文件,多少偏移量,读取多长),可通过context.getInputSplit()获取InputSplit类对象,InputSplit为 抽象类,需要强转。
2.设置一个joinBean类
public class JoinBean implements Writable{
private String oid;
private String uid;
private String name;
private int age;
private String gender;
private String friend;
public JoinBean() {
}
public void set(String oid, String uid, String name, int age, String gender, String friend) {
this.oid = oid;
this.uid = uid;
this.name = name;
this.age = age;
this.gender = gender;
this.friend = friend;
}
public String getOid() {
return oid;
}
public void setOid(String oid) {
this.oid = oid;
}
public String getUid() {
return uid;
}
public void setUid(String uid) {
this.uid = uid;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public int getAge() {
return age;
}
public void setAge(int age) {
this.age = age;
}
public String getGender() {
return gender;
}
public void setGender(String gender) {
this.gender = gender;
}
public String getFriend() {
return friend;
}
public void setFriend(String friend) {
this.friend = friend;
}
@Override
public String toString() {
return "[oid=" + oid + ", uid=" + uid + ", name=" + name + ", age=" + age + ", gender=" + gender
+ ", friend=" + friend + "]";
}
public void readFields(DataInput in) throws IOException {
this.oid = in.readUTF();
this.uid = in.readUTF();
this.name = in.readUTF();
this.age = in.readInt();
this.gender = in.readUTF();
this.friend = in.readUTF();
}
public void write(DataOutput out) throws IOException {
out.writeUTF(oid);
out.writeUTF(uid);
out.writeUTF(name);
out.writeInt(age);
out.writeUTF(gender);
out.writeUTF(friend);
}
}
3.Mapper中如何获取文件信息
public static class JoinMapper extends Mapper<LongWritable, Text, Text, JoinBean>{
protected void map(LongWritable key, Text value, org.apache.hadoop.mapreduce.Mapper<LongWritable,Text,Text,JoinBean>.Context context)
throws java.io.IOException ,InterruptedException {
//获取任务切片信息
FileSplit inputSplit = (FileSplit) context.getInputSplit();
//从任务切片信息中获取文件路径
String fileName = inputSplit.getPath().getName();
String[] split = value.toString().split(",");
JoinBean joinBean = new JoinBean();
//来自user.txt的数据,不存在的字段设置为"NULL"
if(fileName.equals("user.txt")){
joinBean.set("NULL", split[0], split[1], Integer.parseInt(split[2]), split[3], split[4]);
}else{
joinBean.set(split[0],split[1],"NULL",0,"NULL","NULL");
}
//key:uid
context.write(new Text(joinBean.getUid()), joinBean);
}
}
4.Reducer类
public static class JoinReducer extends Reducer<Text, JoinBean, JoinBean, NullWritable>{
@Override
protected void reduce(Text key, Iterable<JoinBean> values,
Reducer<Text, JoinBean, JoinBean, NullWritable>.Context context) throws IOException, InterruptedException {
ArrayList<JoinBean> orderList = new ArrayList<JoinBean>();
JoinBean userBean = new JoinBean();
for (JoinBean v : values) {
//如果这条数据是user.txt
if(v.getOid().equals("NULL")){
try {
BeanUtils.copyProperties(userBean, v);
} catch (Exception e) {
e.printStackTrace();
}
}else{
//如果这条数据是订单数据
JoinBean newBean = new JoinBean();
try {
BeanUtils.copyProperties(newBean, v);
orderList.add(newBean);
} catch (Exception e) {
e.printStackTrace();
}
}
}
//拼接数据:
for (JoinBean joinBean : orderList) {
joinBean.setName(userBean.getName());
joinBean.setAge(userBean.getAge());
joinBean.setGender(userBean.getGender());
joinBean.setFriend(userBean.getFriend());
context.write(joinBean, NullWritable.get());
//
}
}
}