Mapduce过程中是否开启combiner需要注意的问题

最新推荐文章于 2022-07-03 18:27:32 发布

YongYu_IT

最新推荐文章于 2022-07-03 18:27:32 发布

阅读量266

点赞数

分类专栏：大数据复习

本文链接：https://blog.csdn.net/YongYu_IT/article/details/115030169

版权

大数据复习专栏收录该内容

5 篇文章 0 订阅

订阅专栏

统计语文数学英语各学科的平均数，如果只考虑map和reduce的过程，就不要开启combiner,如果开启combiner ，就要考虑combiner的过程，即使combiner算法与reduce几乎一样，但是要注意数据从map端到reduce的结果，和数据从combiner端到reduce的结果会有一点不一样

如果开启combiner，但是combiner 过程与reduce过程一样，会有问题

例如：

map端输出的数据

Math Bob 100

Math Alice 90

English Alice 80

English Bob 105

Chiness Alice 105

Chiness Bob 90

combiner 端输出的数据（此处计算次数是count++,没有问题）

Math 190 2

English 185 2

Chiness 195 2

reduce端输出的数据（此时计算次数不要count++，应该 count += Integer.valueOf(split[1]);

这是没有开启combiner 的代码

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

import java.io.IOException;
import java.util.HashMap;

public class CollectStuMRAve {
    //Chiness avg
    public static class avgmap extends Mapper<LongWritable, Text, Text, Text> {
        protected void map(LongWritable key, Text values, Context context) throws IOException, InterruptedException {
            System.out.println("map --> key: " + key + ", value:" + values);
            String[] value = values.toString().split("\t");
            context.write(new Text("Chiness"), new Text(value[0] + "--" + new Text(value[1])));
            context.write(new Text("Math"), new Text(value[0] + "--" + new Text(value[2])));
            context.write(new Text("English"), new Text(value[0] + "--" + new Text(value[3])));
        }


    }

    public static class avgreduce extends Reducer<Text, Text, Text, Text> {
        protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
            HashMap<String, Integer> hm = new HashMap<String, Integer>();
            int count = 0;
            int total = 0;

            System.out.println("handing key-->" + key);
            for (Text value : values) {
                System.out.println("handing --> " + value.toString());
                count++;
                String[] split = value.toString().split("--");
                total += Integer.valueOf(split[1]);
            }


            context.write(key, new Text(total + "--" + count));
        }

    }

    /**
     * $ hdfs dfs -rm -r /user/yong/out
     * $ hadoop jar /home/yong/Desktop/testMR/target/testMR-1.0-SNAPSHOT.jar CollectStuMRAve /user/yong/input/test_data_stu /user/yong/out
     * $ hdfs dfs -cat /user/yong/out/part-r-00000
     */

    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
        System.out.println("args1: " + otherArgs[0]);
        System.out.println("args2: " + otherArgs[1]);
        Job job = Job.getInstance(conf, "CollectStuMRAve");
        job.setJarByClass(CollectStuMRAve.class);
        job.setMapperClass(CollectStuMRAve.avgmap.class);

        //job.setCombinerClass(CollectStuMRAve.avgreduce.class);
        //job.setPartitionerClass(SubjectPartitioner.class);
        job.setReducerClass(CollectStuMRAve.avgreduce.class);
        //job.setNumReduceTasks(4);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);
        FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
        FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
        System.exit(job.waitForCompletion(true) ? 0 : 1);

    }

}

这是开启combiner后的代码

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

import java.io.IOException;
import java.util.HashMap;

public class CollectStuMRAve {
    //Chiness avg
    public static class avgmap extends Mapper<LongWritable, Text, Text, Text> {
        protected void map(LongWritable key, Text values, Context context) throws IOException, InterruptedException {
            System.out.println("map --> key: " + key + ", value:" + values);
            String[] value = values.toString().split("\t");
            context.write(new Text("Chiness"), new Text(value[0] + "--" + new Text(value[1])));
            context.write(new Text("Math"), new Text(value[0] + "--" + new Text(value[2])));
            context.write(new Text("English"), new Text(value[0] + "--" + new Text(value[3])));
        }


    }

    public static class mycombiner extends Reducer<Text, Text, Text, Text> {
        protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
            HashMap<String, Integer> hm = new HashMap<String, Integer>();
            int count = 0;
            int total = 0;

            System.out.println("handing key-->" + key);
            for (Text value : values) {
                System.out.println("handing --> " + value.toString());

                String[] split = value.toString().split("--");
                total += Integer.valueOf(split[1]);
                count++;
            }


            context.write(key, new Text(count + "--" + total));
        }

    }
    public static class avgreduce extends Reducer<Text, Text, Text, Text> {
        protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
            HashMap<String, Integer> hm = new HashMap<String, Integer>();
            int count = 0;
            int total = 0;

            System.out.println("handing key-->" + key);
            for (Text value : values) {
                System.out.println("handing --> " + value.toString());

                String[] split = value.toString().split("--");
                total += Integer.valueOf(split[1]);
                count+=Integer.valueOf(split[0]);
            }


            context.write(key, new Text(count + "--" + total));
        }

    }

    /**
     * $ hdfs dfs -rm -r /user/yong/out
     * $ hadoop jar /home/yong/Desktop/testMR/target/testMR-1.0-SNAPSHOT.jar CollectStuMRAve /user/yong/input/test_data_stu /user/yong/out
     * $ hdfs dfs -cat /user/yong/out/part-r-00000
     */

    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
        System.out.println("args1: " + otherArgs[0]);
        System.out.println("args2: " + otherArgs[1]);
        Job job = Job.getInstance(conf, "CollectStuMRAve");
        job.setJarByClass(CollectStuMRAve.class);
        job.setMapperClass(CollectStuMRAve.avgmap.class);

        job.setCombinerClass(CollectStuMRAve.mycombiner.class);
        //job.setPartitionerClass(SubjectPartitioner.class);
        job.setReducerClass(CollectStuMRAve.avgreduce.class);
        //job.setNumReduceTasks(4);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);
        FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
        FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
        System.exit(job.waitForCompletion(true) ? 0 : 1);

    }

}

YongYu_IT

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
Mapduce过程中是否开启combiner需要注意的问题

统计语文数学英语各学科的平均数，如果只考虑map和reduce的过程，就不要开启combiner,如果开启combiner ，就要考虑combiner的过程，即使combiner算法与reduce几乎一样，但是要注意数据从map端到reduce的结果，和数据从combiner端到reduce的结果会有一点不一样如果开启combiner，但是combiner 过程与reduce过程一样，会有问题例如：map端输出的数据Math Bob 100Math Alice 90English A
复制链接

扫一扫