Bug的背景
初学MR程序,一定是从WordCount开始的,跟着敲一遍,发现hadoop执行WordCount后,不对结果进行累加,这是怎么一回事呢?
Bug源码
package mr;
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Job;
public class WordCount {
/**
* 测试wordcount
* @Biglucky
*/
public static class TokenizerMapper extends Mapper<Object,Text,Text,IntWritable>
{
private final static IntWritable one = new IntWritable(1);
private Text word = new Text();
public void map(Object key,Text value,Context context) throws IOException,InterruptedException
{
StringTokenizer itr = new Stri