mapreduce（排序）

最新推荐文章于 2024-07-19 17:37:28 发布

苦瓜大哥

最新推荐文章于 2024-07-19 17:37:28 发布

阅读量710

点赞数 22

文章标签： mapreduce java 大数据

本文链接：https://blog.csdn.net/LJT_love/article/details/135229874

版权

在mapreduce中默认排序（正序），本文通过新建keyCompartor实现倒序

可以发现其中方法可以对数据进行去重

sortLaunch

public class SortLaunch {
    public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
        //log4j中日志打印
        BasicConfigurator.configure();
        //通过job实例获取job对象
        Job job      = Job.getInstance();
        //设置启动类
        job.setJarByClass(SortLaunch.class);
        //设置名字
        job.setJobName("sort");

        //配置Mapper
        job.setMapperClass(SortMapper.class);
        //配置reduce
        job.setReducerClass(SortReduce.class);
        //配置key比较器
        job.setSortComparatorClass(KeyCompartor.class);

        //配置Mapper（key）输出类型
        job.setMapOutputKeyClass(IntWritable.class);
        //配置Mapper（value）输出类型
        job.setMapOutputValueClass(NullWritable.class);
        //配置key输出类型
        job.setOutputKeyClass(IntWritable.class);
        //配置value输出类型
        job.setOutputValueClass(NullWritable.class);

        //通过job配置获取FileSystem
        FileSystem fs = FileSystem.get(job.getConfiguration());
        //获取文件输出路径
        Path out      = new Path("D:/ideaProjects/hadoop_pro/sort1/output");
        if (fs.exists(out)){
            fs.delete(out,true);
        }

        //获取文件输入路径
        FileInputFormat.addInputPath(job,new Path("D:/ideaProjects/hadoop_pro/sort1/input"));
        //设置文件输出路径
        FileOutputFormat.setOutputPath(job,out);

        //设置reduce数量
        job.setNumReduceTasks(1);
        //提交job
        job.waitForCompletion(true);

    }
}

sortMapper

public class SortMapper extends Mapper<LongWritable, Text, IntWritable, NullWritable> {
    @Override
    protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, IntWritable, NullWritable>.Context context) throws IOException, InterruptedException {
        IntWritable outKey = new IntWritable();
        String line        = value.toString();
        outKey.set(Integer.parseInt(line));
        context.write(outKey, NullWritable.get());
    }
}

sortReduce

public class SortReduce extends Reducer<IntWritable, NullWritable,IntWritable,NullWritable> {
    @Override
    protected void reduce(IntWritable key, Iterable<NullWritable> values, Reducer<IntWritable, NullWritable, IntWritable, NullWritable>.Context context) throws IOException, InterruptedException {
        for (NullWritable value : values) {
            context.write(key,NullWritable.get());
        }

    }
}

keyCompartor

public class KeyCompartor extends WritableComparator {
    public KeyCompartor(){
        super(IntWritable.class,true);
    }

    /**
     *
     * @param a
     * @param b
     * @return  0 相等   1 大于   -1  小于
     */
    @Override
    public int compare(WritableComparable a, WritableComparable b) {
        IntWritable left   = (IntWritable) a;
        IntWritable right  = (IntWritable) b;

        return -(left.compareTo(right));
    }
}

intput

output

去重

SortMapper
public class SortMapper extends Mapper<LongWritable, Text,IntWritable, NullWritable> {
    @Override
    protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, IntWritable, NullWritable>.Context context) throws IOException, InterruptedException {
       IntWritable k =new IntWritable();
        String line = value.toString();
        k.set(Integer.parseInt(line));
        context.write(k,NullWritable.get());
    }
}

SortReduce
public class SortReduce extends Reducer<IntWritable, NullWritable,IntWritable,NullWritable> {
    @Override
    protected void reduce(IntWritable key, Iterable<NullWritable> values, Reducer<IntWritable, NullWritable, IntWritable, NullWritable>.Context context) throws IOException, InterruptedException {

            context.write(key,NullWritable.get());


    }
}


SortLaunch
    
public class SortLaunch {
    public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
        BasicConfigurator.configure();
        Job job = Job.getInstance();
        job.setJarByClass(SortLaunch.class);
        job.setJobName("sort");

        job.setMapperClass(SortMapper.class);
        job.setReducerClass(SortReduce.class);

        job.setMapOutputKeyClass(IntWritable.class);
        job.setMapOutputValueClass(NullWritable.class);
        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(NullWritable.class);

        FileSystem fs = FileSystem.get(job.getConfiguration());
        Path out = new Path("D:/ideaProjects/hadoop_pro/quchong/output");
        if (fs.exists(out)){
            fs.delete(out,true);
        }

        FileInputFormat.addInputPath(job,new Path("D:/ideaProjects/hadoop_pro/quchong/input"));
        FileOutputFormat.setOutputPath(job,out);

        job.setNumReduceTasks(1);
        job.waitForCompletion(true);

    }
}