Java MapReduce用值进行降序排序_protected void cleanup(reducer<text, doublewritabl-CSDN博客

本文链接：https://blog.csdn.net/2402_84895402/article/details/138583995

题：

编写 MapReduce 程序，实现以下功能：统计学生的成绩按降序排序

原始数据：

1001,张三,56
1002,李四,1
1003,王二,5
1004,赵六,200
1005,麻子,58
1006,王五,89

Mapper代码：

    private static class Mapp extends Mapper<LongWritable, Text, Text, DoubleWritable> {
        @Override
        protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, DoubleWritable>.Context context) throws IOException, InterruptedException {
            String[] split = value.toString().split(","); // 将Text类型转换为String，分隔符为逗号（,）
            double a = Double.parseDouble(split[2]); //split[2] 为数值类型
            context.write(new Text(split[1]), new DoubleWritable(a)); // split[1] 为地区
        }
    }

Reduce代码：

    private static class Red extends Reducer<Text, DoubleWritable, Text, DoubleWritable> {

        private TreeMap<Double, List<String>> sorted;
        // 定义一个Tree Map对象 数值为键，姓名为值
        // 将mapper传入的键值进行调换，供后续使用键进行排序处理


        // setup方法用于在每个reducer实例初始化时被调用
        // setup方法用于初始化操作
        @Override
        protected void setup(Reducer<Text, DoubleWritable, Text, DoubleWritable>.Context context) throws IOException, InterruptedException {
            sorted = new TreeMap<>(Collections.reverseOrder());
            // 创建的Tree Map对象，并指定了一个反向排序的比较器
            // 根据值进行降序排序（升序排序时可以不填写方法）
        }

        @Override
        protected void reduce(Text key, Iterable<DoubleWritable> values, Reducer<Text, DoubleWritable, Text, DoubleWritable>.Context context) throws IOException, InterruptedException {

            // 创建一个空列表用于存储每个姓名的数值，并求出最大值
            List<Double> a = new ArrayList<>();
            for (DoubleWritable value : values) {
                // 遍历数值放入空列表
                a.add(value.get());
            }

            // 取出最大值并赋值给 变量s
            double s = Collections.max(a);

            // 将键值放入创建好的Tree Map对象中，便于进行排序
            if (sorted.containsKey(s)) {  // 列表中包含这个键（s 数值）就将key（地区）添加到sorted对应的s列表中
                sorted.get(s).add(key.toString());
            } else {
                List<String> keyl = new ArrayList<>(); // 如果没有包含，就创建一个新的字符串列表（keyl）
                keyl.add(key.toString());
                sorted.put(s, keyl); // 将key放入列表中(keyl) 然后将该值s对应的键列表keyl放入sorted对象中
            }

        }

        // 清理收尾工作
        @Override
        protected void cleanup(Reducer<Text, DoubleWritable, Text, DoubleWritable>.Context context) throws IOException, InterruptedException {
            // 遍历并使用Map函数映射Tree Map对象取出需要的键值对
            for (Map.Entry<Double, List<String>> en : sorted.entrySet()) {
                Double value = en.getKey(); // 将需要的值取出
                List<String> keylist = en.getValue(); // 将Tree Map对象中的字符串列表取出赋值给新的列表
                for (String s : keylist) {  //遍历新的字符串列表
                    // 将键值对放入容器中，显示上下文，输出键值对
                    context.write(new Text(s), new DoubleWritable(value)); // 排序完成输出键值
                }
            }
        }
    }

Driver代码：

    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);  // 创建一个job对象
        job.setJarByClass(Sorted.class);  // 设置Driver驱动类
        job.setMapperClass(Mapp.class);   // 关联mapper类
        job.setReducerClass(Red.class);   // 关联Reducer类

        job.setMapOutputKeyClass(Text.class);   // 设置Mapper输出键类型
        job.setMapOutputValueClass(DoubleWritable.class);   // 设置Mapper输出值类型

        job.setOutputKeyClass(Text.class);   // 设置最终输出键类型
        job.setOutputValueClass(DoubleWritable.class);   // 设置最终输出值类型

        // 设置文件出入路径  打包后可设置为jar包输入的文件路径 arge[0]
        FileInputFormat.setInputPaths(job, new Path("D:\\data\\eurasia_mainland.csv"));

        Path path = new Path("d:\\data\\tmp2");   // 创建一个Path对象，放入文件输出路径
        FileSystem file = path.getFileSystem(conf);   // 调用conf对象用于存储参数
        if (file.exists(path)) {                    // 判断
            file.delete(path, true);      // 如果文件存在则先删除文件
        }
        FileOutputFormat.setOutputPath(job, path);   // 指定文件输出路径

        System.exit(job.waitForCompletion(true) ? 0 : 1);    // 任务结束，提交任务

    }

处理后结果：

赵六   200.0
王五   89.0
麻子   58.0
张三   56.0
王二   5.0
李四   1.0

完整代码：

package org.example;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;
import java.util.*;

public class Sorted {
    private static class Mapp extends Mapper<LongWritable, Text, Text, DoubleWritable> {
        @Override
        protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, DoubleWritable>.Context context) throws IOException, InterruptedException {
            String[] split = value.toString().split(","); // 将Text类型转换为String，分隔符为逗号（,）
            double a = Double.parseDouble(split[2]); //split[2] 为成绩
            context.write(new Text(split[1]), new DoubleWritable(a)); // split[1] 为学生姓名
        }
    }

    private static class Red extends Reducer<Text, DoubleWritable, Text, DoubleWritable> {

        private TreeMap<Double, List<String>> sorted;
        // 定义一个Tree Map对象 数值为键，姓名为值
        // 将mapper传入的键值进行调换，供后续使用键进行排序处理


        // setup方法用于在每个reducer实例初始化时被调用
        // setup方法用于初始化操作
        @Override
        protected void setup(Reducer<Text, DoubleWritable, Text, DoubleWritable>.Context context) throws IOException, InterruptedException {
            sorted = new TreeMap<>(Collections.reverseOrder());
            // 创建的Tree Map对象，并指定了一个反向排序的比较器
            // 根据值进行降序排序（升序排序时可以不填写方法）
        }

        @Override
        protected void reduce(Text key, Iterable<DoubleWritable> values, Reducer<Text, DoubleWritable, Text, DoubleWritable>.Context context) throws IOException, InterruptedException {

            // 创建一个空列表用于存储每个姓名的数值，并求出最大值
            List<Double> a = new ArrayList<>();
            for (DoubleWritable value : values) {
                // 遍历数值放入空列表
                a.add(value.get());
            }

            // 取出最大值并赋值给 变量s
            double s = Collections.max(a);

            // 将键值放入创建好的Tree Map对象中，便于进行排序
            if (sorted.containsKey(s)) {  // 列表中包含这个键（s 数值）就将key（地区）添加到sorted对应的s列表中
                sorted.get(s).add(key.toString());
            } else {
                List<String> keyl = new ArrayList<>(); // 如果没有包含，就创建一个新的字符串列表（keyl）
                keyl.add(key.toString());
                sorted.put(s, keyl); // 将key放入列表中(keyl) 然后将该值s对应的键列表keyl放入sorted对象中
            }

        }

        // 清理收尾工作
        @Override
        protected void cleanup(Reducer<Text, DoubleWritable, Text, DoubleWritable>.Context context) throws IOException, InterruptedException {
            // 遍历并使用Map函数映射Tree Map对象取出需要的键值对
            for (Map.Entry<Double, List<String>> en : sorted.entrySet()) {
                Double value = en.getKey(); // 将需要的值取出
                List<String> keylist = en.getValue(); // 将Tree Map对象中的字符串列表取出赋值给新的列表
                for (String s : keylist) {  //遍历新的字符串列表
                    // 将键值对放入容器中，显示上下文，输出键值对
                    context.write(new Text(s), new DoubleWritable(value)); // 排序完成输出键值
                }
            }
        }
    }

    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);  // 创建一个job对象
        job.setJarByClass(Sorted.class);  // 设置Driver驱动类
        job.setMapperClass(Mapp.class);   // 关联mapper类
        job.setReducerClass(Red.class);   // 关联Reducer类

        job.setMapOutputKeyClass(Text.class);   // 设置Mapper输出键类型
        job.setMapOutputValueClass(DoubleWritable.class);   // 设置Mapper输出值类型

        job.setOutputKeyClass(Text.class);   // 设置最终输出键类型
        job.setOutputValueClass(DoubleWritable.class);   // 设置最终输出值类型

        // 设置文件出入路径  打包后可设置为jar包输入的文件路径 arge[0]
        FileInputFormat.setInputPaths(job, new Path("D:\\data\\eurasia_mainland.csv"));

        Path path = new Path("d:\\data\\tmp2");   // 创建一个Path对象，放入文件输出路径
        FileSystem file = path.getFileSystem(conf);   // 调用conf对象用于存储参数
        if (file.exists(path)) {                    // 判断
            file.delete(path, true);      // 如果文件存在则先删除文件
        }
        FileOutputFormat.setOutputPath(job, path);   // 指定文件输出路径

        System.exit(job.waitForCompletion(true) ? 0 : 1);    // 任务结束，提交任务

    }
}