MapReduce之序列化对象作为key来进行排序

MapReduce之序列化对象作为key来进行排序

0.思路:
1、定义一个java类,实现WritableComparable接口
2、重写方法compare,实现排序规则
3、只需写Mapper阶段,验证是否按照排序规则进行排序
4、编写Job类,设置mapper及输入输出
1.依赖
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.7.3</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.7.3</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-common</artifactId>
<version>2.7.3</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>2.7.3</version>
</dependency>
2.添加log4j.properties文件在资源目录下即resources,文件内容如下
log4j.rootLogger=FATAL, dest1
log4j.logger.dsaLogging=DEBUG, dsa
log4j.additivity.dsaLogging=false

log4j.appender.dest1=org.apache.log4j.ConsoleAppender
log4j.appender.dest1.layout=org.apache.log4j.PatternLayout
log4j.appender.dest1.layout.ConversionPattern=%-5p:%l: %m%n
log4j.appender.dest1.ImmediateFlush=true

log4j.appender.dsa=org.apache.log4j.RollingFileAppender
log4j.appender.dsa.File=./logs/dsa.log
log4j.appender.dsa.MaxFileSize=2000KB
# Previously MaxBackupIndex=2
log4j.appender.dsa.MaxBackupIndex=5
log4j.appender.dsa.layout=org.apache.log4j.PatternLayout
log4j.appender.dsa.layout.ConversionPattern=%l:%d: %m%n
3.编写Employee序列化类实现WritableComparable接口
package com.sort.KeySort;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class Employee implements WritableComparable<Employee> {
    //7369,SMITH,CLERK,7902,1980/12/17,800,,20
    private IntWritable empNo;
    private Text empName;
    private Text empJob;
    private IntWritable leaderNo;
    private Text hireDate;
    private IntWritable empSalary;
    private Text empBonus;
    private IntWritable deptNo;

    public Employee() {
        this.empNo = new IntWritable();
        this.empName = new Text("");
        this.empJob = new Text("");
        this.leaderNo = new IntWritable();
        this.hireDate = new Text("");
        this.empSalary =new IntWritable();
        this.empBonus = new Text("");
        this.deptNo = new IntWritable();
    }

    public Employee(int empNo, String empName, String empJob, int leaderNo,
                    String hireDate, int empSalary, String empBonus, int deptNo) {
        this.empNo = new IntWritable(empNo);
        this.empName = new Text(empName);
        this.empJob = new Text(empJob);
        this.leaderNo = new IntWritable(leaderNo);
        this.hireDate = new Text(hireDate);
        this.empSalary =new IntWritable(empSalary);
        this.empBonus = new Text(empBonus);
        this.deptNo = new IntWritable(deptNo);
    }

    @Override
    public void write(DataOutput out) throws IOException {
//序列化
        this.empNo.write(out);
        this.empName.write(out);
        this.empJob.write(out);
        this.leaderNo.write(out);
        this.hireDate.write(out);
        this.empSalary.write(out);
        this.empBonus.write(out);
        this.deptNo.write(out);
    }

    @Override
    public void readFields(DataInput in) throws IOException {
        this.empNo.readFields(in);
        this.empName.readFields(in);
        this.empJob.readFields(in);
        this.leaderNo.readFields(in);
        this.hireDate.readFields(in);
        this.empSalary.readFields(in);
        this.empBonus.readFields(in);
        this.deptNo.readFields(in);
    }

    @Override
    public String toString() {
        return "Employee{" +
                "empNo=" + empNo +
                ", empName=" + empName +
                ", empJob=" + empJob +
                ", leaderNo=" + leaderNo +
                ", hireDate=" + hireDate +
                ", empSalary=" + empSalary +
                ", empBonus=" + empBonus +
                ", deptNo=" + deptNo +
                '}';
    }

    public IntWritable getEmpNo() {
        return empNo;
    }

    public void setEmpNo(IntWritable empNo) {
        this.empNo = empNo;
    }

    public Text getEmpName() {
        return empName;
    }

    public void setEmpName(Text empName) {
        this.empName = empName;
    }

    public Text getEmpJob() {
        return empJob;
    }

    public void setEmpJob(Text empJob) {
        this.empJob = empJob;
    }

    public IntWritable getLeaderNo() {
        return leaderNo;
    }

    public void setLeaderNo(IntWritable leaderNo) {
        this.leaderNo = leaderNo;
    }

    public Text getHireDate() {
        return hireDate;
    }

    public void setHireDate(Text hireDate) {
        this.hireDate = hireDate;
    }

    public IntWritable getEmpSalary() {
        return empSalary;
    }

    public void setEmpSalary(IntWritable empSalary) {
        this.empSalary = empSalary;
    }

    public Text getEmpBonus() {
        return empBonus;
    }

    public void setEmpBonus(Text empBonus) {
        this.empBonus = empBonus;
    }

    public IntWritable getDeptNo() {
        return deptNo;
    }

    public void setDeptNo(IntWritable deptNo) {
        this.deptNo = deptNo;
    }

/**
 * 自定义排序规则
 * 按照部门号升序排
 * @param o
 * @return
 */
// public int compareTo(Employee o) {
// if (this.deptNo.get() > o.getDeptNo().get()){
// return 1;
// }else if(this.deptNo.get() < o.getDeptNo().get()){
// return -1;
// }else{
// return 0;
// }
// }

    /**
     * 自定义排序规则
     * 按照部门号升序排,员工工资降序排序
     * @param o
     * @return
     */
    public int compareTo(Employee o) {
        if (this.deptNo.get() > o.getDeptNo().get()){
            return 1;
        }else if(this.deptNo.get() < o.getDeptNo().get()){
            return -1;
        }

//说明:部门号是相同的情况,执行下面代码
//按照工资降序排
        if (this.empSalary.get() > o.getEmpSalary().get()){
            return -1;
        }else if(this.empSalary.get() < o.getEmpSalary().get()){
            return 1;
        }else{
            return 0;
        }
    }
}
4.编写mapper类
package com.sort.KeySort;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;
//key1 value1 key2 value2
public class EmpMapper extends Mapper<LongWritable, Text, Employee, IntWritable> {

    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
//数据格式:<0,7369,SMITH,CLERK,7902,1980/12/17,800,,20>
//1、分词
        String[] splits = value.toString().split(",");
//2、创建Employee对象,并且赋值
        Employee employee = null;

//判断员工是否有上级领导,如果没有,则给该字段设置一个0
        if (null == splits[3] || "".equals(splits[3])){
            splits[3] = "0";
        }
//判断员工是否有奖金
        if(null != splits[6] && !"".equals(splits[6])){
            employee = getEmpInstance(splits);
        }else{
            splits[6] = "0";
            employee = getEmpInstance(splits);
        }
//3、通过context写出去
        context.write(employee,employee.getEmpSalary());
    }

    private Employee getEmpInstance(String[] splits){
        Employee employee = new Employee(
                Integer.parseInt(splits[0]),splits[1],splits[2],
                Integer.parseInt(splits[3]),splits[4],Integer.parseInt(splits[5]),
                splits[6],Integer.parseInt(splits[7])
        );
        return employee;
    }
}
5.编写Driver类
package com.sort.KeySort;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.util.Random;

public class EmpJob {
    public static void main(String[] args) throws Exception {
        Job job = Job.getInstance(new Configuration());
        job.setJarByClass(EmpJob.class);

        job.setMapperClass(EmpMapper.class);
        job.setMapOutputKeyClass(Employee.class);//key2 : Employee
        job.setMapOutputValueClass(IntWritable.class);//value2

        job.setOutputKeyClass(Employee.class);
        job.setOutputValueClass(IntWritable.class);

//先使用本地文件做测试
        FileInputFormat.setInputPaths(job,new Path("D:\\emp.csv"));
        FileOutputFormat.setOutputPath(job,new Path(getOutputDir()));

        boolean result = job.waitForCompletion(true);

        System.out.println("result:" + result);
    }

    //用于产生随机输出目录
    public static String getOutputDir(){
        String prefix = "D:\\output\\";
        long time = System.currentTimeMillis();
        int random = new Random().nextInt();
        return prefix + "result_" + time + "_" + random;
    }
}
6.运行

在这里插入图片描述
在这里插入图片描述

  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

Agatha方艺璇

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值