MapReduce之二次排序(练习)

static {
System.setProperty(“hadoop.home.dir”,“E:/x3/hadoop-2.9.2”);
}

//  map
public static class MyMapper extends Mapper<LongWritable,Text,SecondSortData,IntWritable>{
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String[] split = value.toString().split("\t");
        context.write(new SecondSortData(split[0],Integer.parseInt(split[1])),new IntWritable(Integer.parseInt(split[1])));
    }
}

//自定义分区器
public static class MyPartitioner extends Partitioner<SecondSortData,IntWritable>{

    /**
     * 按照map输出的key中的第一个属性进行分区
     * @param secondSortData
     * @param value
     * @param i
     * @return
     */
    @Override
    public int getPartition(SecondSortData secondSortData, IntWritable value, int i) {
        return secondSortData.getFirst().hashCode() % 127 % i;
    }
}

//reduce 聚合计算之前 进行分组比较
public static class MyGroupCompartor extends WritableComparator{
    protected MyGroupCompartor() {
        super(SecondSortData.class,true);
    }
    /**
     * 按照map输出的key中第一个属性进行比较
     * @param a
     * @param b
     * @return
     */
    @Override
    public int compare(WritableComparable a, WritableComparable b) {
       SecondSortData data1 = (SecondSortData) a;
       SecondSortData data2 = (SecondSortData) b;

        return data1.getFirst().compareTo(data2.getFirst());
    }
}

//  reduce
public static class MyReduce extends Reducer<SecondSortData,IntWritable,Text,IntWritable>{
    @Override
    protected void reduce(SecondSortData key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
        for(IntWritable value:values){
            context.write(new Text(key.getFirst()),value);
        }
    }
}

public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {

    //job
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "second_sort");
    //写入文件
    FileInputFormat.addInputPath(job,new Path(args[0]));
    //map并行计算
    job.setMapperClass(MyMapper.class);
    job.setMapOutputKeyClass(SecondSortData.class);
    job.setMapOutputValueClass(IntWritable.class);
    //shuffle流程
    //分区方式为自定义
    job.setPartitionerClass(MyPartitioner.class);
    //分组方式为自定义
    job.setGroupingComparatorClass(MyGroupCompartor.class);
    //reduce计算
    //设置reduce个数
    job.setNumReduceTasks(2);
    job.setReducerClass(MyReduce.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    //判断文件是否存在
    FileSystem fs = FileSystem.get(conf);
    //如果存在 则先删除
    if(fs.exists(new Path(args[1]))){
        fs.delete(new Path(args[1]),true);
    }
    //写出文件
    FileOutputFormat.setOutputPath(job,new Path(args[1]));
    //提交作业
    boolean result = job.waitForCompletion(true);
    System.out.println(result);

}

实体类

public class SecondSortData implements WritableComparable {

private String first;
private Integer second;

public SecondSortData(String first, Integer second) {
    this.first = first;
    this.second = second;
}

public SecondSortData() {
}

public String getFirst() {
    return first;
}

public void setFirst(String first) {
    this.first = first;
}

public Integer getSecond() {
    return second;
}

public void setSecond(Integer second) {
    this.second = second;
}

@Override
public int compareTo(SecondSortData o) {
    //第一列升序
    int result = this.getFirst().compareTo(o.getFirst());
    if(result == 0){
        //第二列降序
        result = -this.getSecond().compareTo(o.getSecond());
    }
    return result;
}

@Override
public void write(DataOutput output) throws IOException {
    output.writeUTF(first);
    output.writeInt(second);
}

@Override
public void readFields(DataInput input) throws IOException {
    this.first = input.readUTF();
    this.second = input.readInt();
}

}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值