Mapduce过程中是否开启combiner需要注意的问题

统计语文数学英语各学科的平均数,如果只考虑map和reduce的过程,就不要开启combiner,如果开启combiner ,就要考虑combiner的过程,即使combiner算法与reduce几乎一样,但是要注意数据从map端到reduce的结果,和数据从combiner端到reduce的结果会有一点不一样

如果开启combiner,但是combiner 过程与reduce过程一样,会有问题

例如:

map端输出的数据

Math Bob 100

Math Alice 90

English Alice 80

English Bob 105

Chiness Alice 105

Chiness Bob 90

combiner 端输出的数据(此处计算次数是count++,没有问题)

Math  190 2

English 185 2

Chiness 195 2 

reduce端输出的数据(此时计算次数不要count++,应该 count += Integer.valueOf(split[1]);

这是没有开启combiner 的代码

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

import java.io.IOException;
import java.util.HashMap;

public class CollectStuMRAve {
    //Chiness avg
    public static class avgmap extends Mapper<LongWritable, Text, Text, Text> {
        protected void map(LongWritable key, Text values, Context context) throws IOException, InterruptedException {
            System.out.println("map --> key: " + key + ", value:" + values);
            String[] value = values.toString().split("\t");
            context.write(new Text("Chiness"), new Text(value[0] + "--" + new Text(value[1])));
            context.write(new Text("Math"), new Text(value[0] + "--" + new Text(value[2])));
            context.write(new Text("English"), new Text(value[0] + "--" + new Text(value[3])));
        }


    }

    public static class avgreduce extends Reducer<Text, Text, Text, Text> {
        protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
            HashMap<String, Integer> hm = new HashMap<String, Integer>();
            int count = 0;
            int total = 0;

            System.out.println("handing key-->" + key);
            for (Text value : values) {
                System.out.println("handing --> " + value.toString());
                count++;
                String[] split = value.toString().split("--");
                total += Integer.valueOf(split[1]);
            }


            context.write(key, new Text(total + "--" + count));
        }

    }

    /**
     * $ hdfs dfs -rm -r /user/yong/out
     * $ hadoop jar /home/yong/Desktop/testMR/target/testMR-1.0-SNAPSHOT.jar CollectStuMRAve /user/yong/input/test_data_stu /user/yong/out
     * $ hdfs dfs -cat /user/yong/out/part-r-00000
     */

    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
        System.out.println("args1: " + otherArgs[0]);
        System.out.println("args2: " + otherArgs[1]);
        Job job = Job.getInstance(conf, "CollectStuMRAve");
        job.setJarByClass(CollectStuMRAve.class);
        job.setMapperClass(CollectStuMRAve.avgmap.class);

        //job.setCombinerClass(CollectStuMRAve.avgreduce.class);
        //job.setPartitionerClass(SubjectPartitioner.class);
        job.setReducerClass(CollectStuMRAve.avgreduce.class);
        //job.setNumReduceTasks(4);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);
        FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
        FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
        System.exit(job.waitForCompletion(true) ? 0 : 1);

    }

}


 

 

这是开启combiner后的代码

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

import java.io.IOException;
import java.util.HashMap;

public class CollectStuMRAve {
    //Chiness avg
    public static class avgmap extends Mapper<LongWritable, Text, Text, Text> {
        protected void map(LongWritable key, Text values, Context context) throws IOException, InterruptedException {
            System.out.println("map --> key: " + key + ", value:" + values);
            String[] value = values.toString().split("\t");
            context.write(new Text("Chiness"), new Text(value[0] + "--" + new Text(value[1])));
            context.write(new Text("Math"), new Text(value[0] + "--" + new Text(value[2])));
            context.write(new Text("English"), new Text(value[0] + "--" + new Text(value[3])));
        }


    }

    public static class mycombiner extends Reducer<Text, Text, Text, Text> {
        protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
            HashMap<String, Integer> hm = new HashMap<String, Integer>();
            int count = 0;
            int total = 0;

            System.out.println("handing key-->" + key);
            for (Text value : values) {
                System.out.println("handing --> " + value.toString());

                String[] split = value.toString().split("--");
                total += Integer.valueOf(split[1]);
                count++;
            }


            context.write(key, new Text(count + "--" + total));
        }

    }
    public static class avgreduce extends Reducer<Text, Text, Text, Text> {
        protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
            HashMap<String, Integer> hm = new HashMap<String, Integer>();
            int count = 0;
            int total = 0;

            System.out.println("handing key-->" + key);
            for (Text value : values) {
                System.out.println("handing --> " + value.toString());

                String[] split = value.toString().split("--");
                total += Integer.valueOf(split[1]);
                count+=Integer.valueOf(split[0]);
            }


            context.write(key, new Text(count + "--" + total));
        }

    }

    /**
     * $ hdfs dfs -rm -r /user/yong/out
     * $ hadoop jar /home/yong/Desktop/testMR/target/testMR-1.0-SNAPSHOT.jar CollectStuMRAve /user/yong/input/test_data_stu /user/yong/out
     * $ hdfs dfs -cat /user/yong/out/part-r-00000
     */

    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
        System.out.println("args1: " + otherArgs[0]);
        System.out.println("args2: " + otherArgs[1]);
        Job job = Job.getInstance(conf, "CollectStuMRAve");
        job.setJarByClass(CollectStuMRAve.class);
        job.setMapperClass(CollectStuMRAve.avgmap.class);

        job.setCombinerClass(CollectStuMRAve.mycombiner.class);
        //job.setPartitionerClass(SubjectPartitioner.class);
        job.setReducerClass(CollectStuMRAve.avgreduce.class);
        //job.setNumReduceTasks(4);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);
        FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
        FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
        System.exit(job.waitForCompletion(true) ? 0 : 1);

    }

}


 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值