根据部门编号分区,根据 薪水+补贴 降序排列
提取文件:https://pan.baidu.com/s/1kG7WvALT-ys24GPbDC8XgQ
提取码:dt5a
数据字段依次为:员工编号,名字,职业,上级编号,日期,薪水,补贴,部门编号
代码分为两部分:(更改类名带入即可直接使用,注释文中均有注意查看)
一、MapReduce和Partition部分:
/**
* @Time : 2021/11/23 18:09
* @Auther : Carapace
* @File : EmploPartition.java
* Software: IntelliJ IDEA
*/
package com.Partition;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Partitioner;
import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.Reducer;
public class EmploPartition {
public static class EmpPartionMapper extends Mapper<LongWritable, Text, Employee, NullWritable> {
@Override
protected void map(LongWritable key1, Text value1, Context context) throws IOException, InterruptedException {
// 数据:7654,MARTIN,SALESMAN,7698,1981/9/28,1250,1400,30
String data = value1.toString();
//分词
String[] words = data.split("\t");
Double comms = 0.0;
if (words[6].equals("")){ //判断奖金项 是否为空
comms = Double.valueOf(0); //返回一个0 对象
}else {
comms = Double.parseDouble(words[6]);
}
//添加 部门号 , 整行值, 薪资, 奖金,
Employee employee = new Employee(Integer.parseInt(words[7]),value1.toString(),Double.parseDouble(words[5]),comms);
context.write(employee,NullWritable.get());
}
}
//实现分区的逻辑:按照部门号进行分区
public static class MyEmployeePartitioner extends Partitioner<Employee, NullWritable> {
@Override
public int getPartition(Employee longWritable, NullWritable employee, int numPartitions) {
// 建立自己的分区规则:按照部门号进行分区
// 参数: numParts 分区的个数,需要在主程序中的job中设置
int deptno = longWritable.getDeptno(); //获取Employee种的deptno
String detpnos =String.valueOf(deptno); //deptno转为字符串 用于equals()比较
int partition;
if ("10".equals(detpnos)) {
//10号部门的员工,分到0号区中
partition = 0;
} else if ("20".equals(detpnos)) {
//20号部门的员工,分到1号区中
partition = 1;
} else if ("30".equals(detpnos)) {
//30号部门的员工,分到2号区
partition = 2;
} else {
partition = 3;
}
return partition;
}
}
//把分区后的数据写到HDFS:按照相同部门号输出 k4: 员工号 v4 员工信息
public static class EmpPartionReducer extends Reducer<Employee, NullWritable, Text, NullWritable> {
@Override
protected void reduce(Employee k3, Iterable<NullWritable> v3, Context context) throws IOException, InterruptedException {
//for (NullWritable e : v3) {
context.write(new Text(k3.getHiredate()),NullWritable.get());
//}
}
}
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
// 创建一个任务job = map + reduce
Job job = Job.getInstance(new Configuration());
//指定任务的入口
job.setJarByClass(EmploPartition.class);
//指定任务的Map和输出的数据类型
job.setMapperClass(EmpPartionMapper.class);
job.setMapOutputKeyClass(Employee.class);//员工号
job.setMapOutputValueClass(NullWritable.class);//员工对象
//设置任务的分区规则
job.setPartitionerClass(MyEmployeePartitioner.class);
//指定分区的个数
job.setNumReduceTasks(4);
//指定任务的Reduce和输出的数据类型
job.setReducerClass(EmpPartionReducer.class);
job.setOutputKeyClass(Text.class);//员工号
job.setOutputValueClass(NullWritable.class); // 员工对象
//指定输入和输出的HDFS路径
String inpath="./datas/emp.txt";
String outpath="./output009";
FileInputFormat.addInputPath(job,new Path(inpath));
FileOutputFormat.setOutputPath(job,new Path(outpath));
FileSystem fs = FileSystem.get(new Configuration());
if (fs.exists(new Path(outpath))) {
fs.delete(new Path(outpath),true); //第一个参数 删除路径文件; 第二个参数 递归删除路径下的所有文件 true开启 false 关闭
}
//提交任务
job.waitForCompletion(true);
}
}
二、Employee类:
/**
* @Time : 2021/11/23 18:08
* @Auther : Carapace
* @File : Employee.java
* Software: IntelliJ IDEA
*/
package com.Partition;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
//员工类: 7654,MARTIN,SALESMAN,7698,1981/9/28,1250,1400,30
public class Employee implements WritableComparable<Employee> {
private String hiredate;// 定义字符串对象,在MapReduce种用于接收其余的字段
private double sal;//月薪
private double comm;//奖金
private int deptno;// 部门号
public Employee() { //空参构造
}
public Employee(int deptno,String hiredate,Double sal,Double comm) {
// this.name = name;
// this.cls = cls;
// this.score = score;
this.sal=sal;
this.comm=comm;
this.deptno=deptno;
this.hiredate = hiredate;
}
@Override
public void write(DataOutput output) throws IOException {
// 序列化:把对象输出
output.writeUTF(this.hiredate);
output.writeDouble(this.sal);
output.writeDouble(this.comm);
output.writeInt(this.deptno);
}
@Override
// 比较器
public int compareTo(Employee o) {
return this.sal+this.comm>o.sal+o.comm?-1:1;
}
@Override
public void readFields(DataInput input) throws IOException {
// 反序列化:把对象读入
this.hiredate = input.readUTF();
this.sal = input.readDouble();
this.comm = input.readDouble();
this.deptno = input.readInt();
}
public String getHiredate() {
return hiredate;
}
public void setHiredate(String hiredate) {
this.hiredate = hiredate;
}
public double getSal() {
return sal;
}
public void setSal(Double sal) {
this.sal = sal;
}
public double getComm() {
return comm;
}
public void setComm(double comm) {
this.comm = comm;
}
public int getDeptno() {
return deptno;
}
public void setDeptno(int deptno) {
this.deptno = deptno;
}
@Override
public String toString() {
return hiredate;
}
}