mr实现join

6 篇文章 0 订阅

需求:展示部门员工的员工编号、员工姓名、部门编号、部门名称。
分析:,部门表跟员工表都有一个相同的字段,部门编号。可以根据部门编号进行jion操作,再将所属表的信息进行一个标识(flag)。
map中根据文件名判断当前行数据所属的数据,并设置对应的flag值
在reduce中根据根据关联字段进行join处理
部门表:

10  ACCOUNTING  NEWYORK
20  RESEARCH    DALLAS
30  SALES   CHICAGO
40  OPERATIONS  BOSTON

员工表:

7369	SMITH	CLERK	7902	1980-12-17	800.00	20
7499	ALLEN	SALESMAN	7698	1981-2-20	1600.00	30
7521	WARD	SALESMAN	7698	1981-2-22	1250.00	30
7566	JONES	MANAGER	7839	1981-4-2	2975.00	20
7654	MARTIN	SALESMAN	7698	1981-9-28	1250.00	30
7698	BLAKE	MANAGER	7839	1981-5-1	2850.00	30
7782	CLARK	MANAGER	7839	1981-6-9	2450.00	10
7788	SCOTT	ANALYST	7566	1987-4-19	3000.00	20
7839	KING	PRESIDENT		1981-11-17	5000.00	10
7844	TURNER	SALESMAN	7698	1981-9-8	1500.00	30
7876	ADAMS	CLERK	7788	1987-5-23	1100.00	20
7900	JAMES	CLERK	7698	1981-12-3	950.00	30
7902	FORD	ANALYST	7566	1981-12-3	3000.00	20
7934	MILLER	CLERK	7782	1982-1-23	1300.00	10
7988	ruoze	CLERK	7732	1982-7-23	1900.00

代码操作:
首先自定义一个序列化

package com.ruozedata.bigdata.hadoop.mapreduce.join;

import org.apache.hadoop.io.Writable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
//实现Writable接口,自定义员工编号,员工姓名,部门编号,部门名称,标识
public class Info implements Writable {
    private int empno;
    private String ename;
    private int deptno;
    private String dname;
    private int flag;

    @Override
    public void write(DataOutput out) throws IOException {
        out.writeInt(empno);
        out.writeUTF(ename);
        out.writeInt(deptno);
        out.writeUTF(dname);
        out.writeInt(flag);
    }
    @Override
    public void readFields(DataInput in) throws IOException {
        this.empno=in.readInt();
        this.ename=in.readUTF();
        this.deptno=in.readInt();
        this.dname=in.readUTF();
        this.flag=in.readInt();

    }

    public int getEmpno() {
        return empno;
    }

    public String getEname() {
        return ename;
    }

    public int getDeptno() {
        return deptno;
    }

    public String getDname() {
        return dname;
    }

    public int getFlag() {
        return flag;
    }

    public void setEmpno(int empno) {
        this.empno = empno;
    }

    public void setEname(String ename) {
        this.ename = ename;
    }

    public void setDeptno(int deptno) {
        this.deptno = deptno;
    }

    public void setDname(String dname) {
        this.dname = dname;
    }

    public void setFlag(int flag) {
        this.flag = flag;
    }

    @Override
    public String toString() {
        return "Info{" +
                "empno=" + empno +
                ", ename='" + ename + '\'' +
                ", deptno=" + deptno +
                ", flag=" + flag +
                '}';
    }
}
package com.ruozedata.bigdata.hadoop.mapreduce.join;

import com.ruozedata.bigdata.hadoop.utils.FileUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

public class ReduceJoinApp {

    public static void main(String[] args) throws Exception {
            //获取job对象
        Configuration configuration = new Configuration();
        Job job = Job.getInstance(configuration);

        String input = "data/info/";
        String output = "out/";
        //自定义工具类,删除文件
        FileUtils.deleteTarget(output, configuration);
        //设置jar相关信息
        job.setJarByClass(ReduceJoinApp.class);
        //设置自定义的mapper跟Reducer
        job.setMapperClass(MyMapper.class);
        job.setReducerClass(MyReducer.class);
        //设置mapper阶段输出的key跟value类型
        job.setMapOutputKeyClass(IntWritable.class);
        job.setMapOutputValueClass(Info.class);
        //设置reducer阶段输出的key跟vlaue类型
        job.setOutputKeyClass(Info.class);
        job.setOutputValueClass(NullWritable.class);
        //设置输入输出路径
        FileInputFormat.setInputPaths(job, new Path(input));
        FileOutputFormat.setOutputPath(job, new Path(output));
        //提交job
        boolean result = job.waitForCompletion(true);
        System.exit(result ? 0 : 1);

    }

    public static class MyMapper extends Mapper<LongWritable, Text, IntWritable, Info> {

        String name;
        @Override
        protected void setup(Context context) throws IOException, InterruptedException {
        //找到文件名
            FileSplit fileSplit = (FileSplit) context.getInputSplit();
            name = fileSplit.getPath().getName();
        }

        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String[] splits = value.toString().split("\t");
            if(splits.length ==7){
                if (name.contains("emp")) {
                    int empno = Integer.parseInt(splits[0].trim());
                    String ename = splits[1];
                    int deptno = Integer.parseInt(splits[6].trim());
                    Info info = new Info();
                    info.setEmpno(empno);
                    info.setEname(ename);
                    info.setDeptno(deptno);
                    info.setDname("");
                    info.setFlag(1);
                    context.write(new IntWritable(deptno),info);
            }
            } else {
                Info info = new Info();
                int deptno = Integer.parseInt(splits[0].trim());
                info.setDeptno(deptno);
                info.setDname(splits[1].trim());
                info.setEmpno(0);
                info.setEname("");
                info.setFlag(2);
                context.write(new IntWritable(deptno), info);
            }
        }
    }

    public static class MyReducer extends Reducer<IntWritable, Info, Info, NullWritable> {
        @Override
        protected void reduce(IntWritable deptno, Iterable<Info> values, Context context) throws IOException, InterruptedException {
            List<Info> emps = new ArrayList<>();
            String dname = "";

            for (Info info : values) {
                if (info.getFlag() == 1) {
                    Info tmp = new Info();
                    tmp.setEmpno(info.getEmpno());
                    tmp.setEname(info.getEname());
                    tmp.setDeptno(info.getDeptno());
                    emps.add(tmp);
                } else {
                    dname = info.getDname();
                }
            }
            for (Info bean : emps) {
                bean.setDname(dname);
                context.write(bean, NullWritable.get());
            }
        }
    }
}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值