MapReduce join案例

两个文件dept.txt

10,ACCOUNTING,NEW YORK
20,RESEARCH,DALLAS
30,SALES,CHICAGO
40,OPERATIONS,BOSTON

字段解释:部门编号,部门名称,所在城市
emp.txt

7369,SMITH,CLERK,7902,1980-12-17,800.00,,20
7499,ALLEN,SALESMAN,7698,1981-2-20,1600.00,300.00,30
7521,WARD,SALESMAN,7698,1981-2-22,1250.00,500.00,30
7566,JONES,MANAGER,7839,1981-4-2,2975.00,,20
7654,MARTIN,SALESMAN,7698,1981-9-28,1250.00,1400.00,30
7698,BLAKE,MANAGER,7839,1981-5-1,2850.00,,30
7782,CLARK,MANAGER,7839,1981-6-9,2450.00,,10
7788,SCOTT,ANALYST,7566,1987-4-19,3000.00,,20
7839,KING,PRESIDENT,,1981-11-17,5000.00,,10
7844,TURNER,SALESMAN,7698,1981-9-8,1500.00,0.00,30
7876,ADAMS,CLERK,7788,1987-5-23,1100.00,,20
7900,JAMES,CLERK,7698,1981-12-3,950.00,,30
7902,FORD,ANALYST,7566,1981-12-3,3000.00,,20
7934,MILLER,CLERK,7782,1982-1-23,1300.00,,10
8888,OTHER,PROGRAM,7839,1988-1-23,10300.00,,

字段解释:员工编号,员工名称,…部门编号

要是写SQL,相当的easy

select a.deptno,a.deptname,b.empno,b.empname from dept a,emp b where a.deptno = b.deptno

源码:

public class EmpInfo implements Writable {

    private int deptno;
    private String deptName;
    private int empno;
    private String empName;
    private int flag;
   
   // getter setter  构造,此处省略...
    @Override
    public void write(DataOutput out) throws IOException {
        out.writeInt(deptno);
        out.writeUTF(deptName);
        out.writeInt(empno);
        out.writeUTF(empName);
        out.writeInt(flag);
    }

    @Override
    public void readFields(DataInput in) throws IOException {

        this.deptno = in.readInt();
        this.deptName = in.readUTF();
        this.empno = in.readInt();
        this.empName = in.readUTF();
        this.flag = in.readInt();
    }
}

Mapper端:

public static class MrJoinMapper extends Mapper<LongWritable, Text, IntWritable, EmpInfo>{
        String filename;
        @Override
        protected void setup(Context context) throws IOException, InterruptedException {
            //通过切片来获取文件名称
            FileSplit split = (FileSplit)context.getInputSplit();
            filename = split.getPath().getName();
        }

        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

            String[] split = value.toString().split(",");
            int deptNo;
            if (filename.equals("dept.txt")){
                EmpInfo empInfo = new EmpInfo();
                deptNo = Integer.parseInt(split[0].trim());
                empInfo.setDeptno(deptNo);
                empInfo.setDeptName(split[1]);
                empInfo.setEmpName("");
                empInfo.setEmpno(0);
                empInfo.setFlag(1);
                context.write(new IntWritable(deptNo),empInfo);
            }else {
                if (split.length > 6){
                    EmpInfo empInfo = new EmpInfo();
                    deptNo = Integer.parseInt(split[7].trim());
                    empInfo.setEmpno(Integer.parseInt(split[0].trim()));
                    empInfo.setEmpName(split[1]);
                    empInfo.setDeptno(deptNo);
                    empInfo.setDeptName("");
                    empInfo.setFlag(2);
                    System.out.println("empInfo :"+empInfo);
                    context.write(new IntWritable(deptNo),empInfo);
                }
            }
        }
    }

Reducer端

public static class MrJoinReducer extends Reducer<IntWritable, EmpInfo,EmpInfo,NullWritable>{        
        @Override
        protected void reduce(IntWritable key, Iterable<EmpInfo> values, Context context) throws IOException, InterruptedException {
            List<EmpInfo> list = new ArrayList<>(20);
            String deptName = "";
            for (EmpInfo emp : values) {
                int flag = emp.getFlag();
                if (flag == 2){
                    EmpInfo temp = new EmpInfo();//这里一定要重新new对象,要不然...(你懂得)
                    temp.setEmpno(emp.getEmpno());
                    temp.setEmpName(emp.getEmpName());
                    temp.setDeptno(emp.getDeptno());
                    list.add(temp);
                }else {
                    deptName = emp.getDeptName();
                }
            }
            for (EmpInfo info : list) {
                info.setDeptName(deptName);
                context.write(info, NullWritable.get());
            }
        }
    }

输出效果:

10, deptName='ACCOUNTING', empno=7934, empName='MILLER
10, deptName='ACCOUNTING', empno=7839, empName='KING
10, deptName='ACCOUNTING', empno=7782, empName='CLARK
20, deptName='RESEARCH', empno=7876, empName='ADAMS
20, deptName='RESEARCH', empno=7788, empName='SCOTT
20, deptName='RESEARCH', empno=7369, empName='SMITH
20, deptName='RESEARCH', empno=7566, empName='JONES
20, deptName='RESEARCH', empno=7902, empName='FORD
30, deptName='SALES', empno=7844, empName='TURNER
30, deptName='SALES', empno=7499, empName='ALLEN
30, deptName='SALES', empno=7698, empName='BLAKE
30, deptName='SALES', empno=7654, empName='MARTIN
30, deptName='SALES', empno=7521, empName='WARD
30, deptName='SALES', empno=7900, empName='JAMES

总结:
①.两文件join关键要知道那条数据来源于哪个表,所以得设立好flag判断数据来自于哪个表
②.map端通过setup方法要得到相对应的文件名
③.join的key是mapper的输出key

  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

冬瓜螺旋雪碧

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值