统计语文数学英语各学科的平均数,如果只考虑map和reduce的过程,就不要开启combiner,如果开启combiner ,就要考虑combiner的过程,即使combiner算法与reduce几乎一样,但是要注意数据从map端到reduce的结果,和数据从combiner端到reduce的结果会有一点不一样
如果开启combiner,但是combiner 过程与reduce过程一样,会有问题
例如:
map端输出的数据
Math Bob 100
Math Alice 90
English Alice 80
English Bob 105
Chiness Alice 105
Chiness Bob 90
combiner 端输出的数据(此处计算次数是count++,没有问题)
Math 190 2
English 185 2
Chiness 195 2
reduce端输出的数据(此时计算次数不要count++,应该 count += Integer.valueOf(split[1]);
这是没有开启combiner 的代码
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import java.io.IOException;
import java.util.HashMap;
public class CollectStuMRAve {
//Chiness avg
public static class avgmap extends Mapper<LongWritable, Text, Text, Text> {
protected void map(LongWritable key, Text values, Context context) throws IOException, InterruptedException {
System.out.println("map --> key: " + key + ", value:" + values);
String[] value = values.toString().split("\t");
context.write(new Text("Chiness"), new Text(value[0] + "--" + new Text(value[1])));
context.write(new Text("Math"), new Text(value[0] + "--" + new Text(value[2])));
context.write(new Text("English"), new Text(value[0] + "--" + new Text(value[3])));
}
}
public static class avgreduce extends Reducer<Text, Text, Text, Text> {
protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
HashMap<String, Integer> hm = new HashMap<String, Integer>();
int count = 0;
int total = 0;
System.out.println("handing key-->" + key);
for (Text value : values) {
System.out.println("handing --> " + value.toString());
count++;
String[] split = value.toString().split("--");
total += Integer.valueOf(split[1]);
}
context.write(key, new Text(total + "--" + count));
}
}
/**
* $ hdfs dfs -rm -r /user/yong/out
* $ hadoop jar /home/yong/Desktop/testMR/target/testMR-1.0-SNAPSHOT.jar CollectStuMRAve /user/yong/input/test_data_stu /user/yong/out
* $ hdfs dfs -cat /user/yong/out/part-r-00000
*/
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
System.out.println("args1: " + otherArgs[0]);
System.out.println("args2: " + otherArgs[1]);
Job job = Job.getInstance(conf, "CollectStuMRAve");
job.setJarByClass(CollectStuMRAve.class);
job.setMapperClass(CollectStuMRAve.avgmap.class);
//job.setCombinerClass(CollectStuMRAve.avgreduce.class);
//job.setPartitionerClass(SubjectPartitioner.class);
job.setReducerClass(CollectStuMRAve.avgreduce.class);
//job.setNumReduceTasks(4);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
这是开启combiner后的代码
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import java.io.IOException;
import java.util.HashMap;
public class CollectStuMRAve {
//Chiness avg
public static class avgmap extends Mapper<LongWritable, Text, Text, Text> {
protected void map(LongWritable key, Text values, Context context) throws IOException, InterruptedException {
System.out.println("map --> key: " + key + ", value:" + values);
String[] value = values.toString().split("\t");
context.write(new Text("Chiness"), new Text(value[0] + "--" + new Text(value[1])));
context.write(new Text("Math"), new Text(value[0] + "--" + new Text(value[2])));
context.write(new Text("English"), new Text(value[0] + "--" + new Text(value[3])));
}
}
public static class mycombiner extends Reducer<Text, Text, Text, Text> {
protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
HashMap<String, Integer> hm = new HashMap<String, Integer>();
int count = 0;
int total = 0;
System.out.println("handing key-->" + key);
for (Text value : values) {
System.out.println("handing --> " + value.toString());
String[] split = value.toString().split("--");
total += Integer.valueOf(split[1]);
count++;
}
context.write(key, new Text(count + "--" + total));
}
}
public static class avgreduce extends Reducer<Text, Text, Text, Text> {
protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
HashMap<String, Integer> hm = new HashMap<String, Integer>();
int count = 0;
int total = 0;
System.out.println("handing key-->" + key);
for (Text value : values) {
System.out.println("handing --> " + value.toString());
String[] split = value.toString().split("--");
total += Integer.valueOf(split[1]);
count+=Integer.valueOf(split[0]);
}
context.write(key, new Text(count + "--" + total));
}
}
/**
* $ hdfs dfs -rm -r /user/yong/out
* $ hadoop jar /home/yong/Desktop/testMR/target/testMR-1.0-SNAPSHOT.jar CollectStuMRAve /user/yong/input/test_data_stu /user/yong/out
* $ hdfs dfs -cat /user/yong/out/part-r-00000
*/
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
System.out.println("args1: " + otherArgs[0]);
System.out.println("args2: " + otherArgs[1]);
Job job = Job.getInstance(conf, "CollectStuMRAve");
job.setJarByClass(CollectStuMRAve.class);
job.setMapperClass(CollectStuMRAve.avgmap.class);
job.setCombinerClass(CollectStuMRAve.mycombiner.class);
//job.setPartitionerClass(SubjectPartitioner.class);
job.setReducerClass(CollectStuMRAve.avgreduce.class);
//job.setNumReduceTasks(4);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}