importjava.io.IOException;importjava.util.ArrayList;importjava.util.Iterator;importjava.util.List;importjava.util.StringTokenizer;importorg.apache.hadoop.conf.Configuration;importorg.apache.hadoop.conf.Configured;importorg.apache.hadoop.fs.Path;importorg.apache.hadoop.io.IntWritable;importorg.apache.hadoop.io.LongWritable;importorg.apache.hadoop.io.Text;importorg.apache.hadoop.mapred.FileInputFormat;importorg.apache.hadoop.mapred.FileOutputFormat;importorg.apache.hadoop.mapred.JobClient;importorg.apache.hadoop.mapred.JobConf;importorg.apache.hadoop.mapred.MapReduceBase;importorg.apache.hadoop.mapred.Mapper;importorg.apache.hadoop.mapred.OutputCollector;importorg.apache.hadoop.mapred.Reducer;importorg.apache.hadoop.mapred.Reporter;importorg.apache.hadoop.util.Tool;importorg.apache.hadoop.util.ToolRunner;publicclassWordCountextendsConfiguredimplementsTool {publicstaticclassMapClassextendsMapReduceBaseimplementsMapper{privatefinalstaticIntWritable one=newIntWritable(1);privateText word=newText();publicvoidmap(LongWritable key, Text value,
OutputCollectoroutput, Reporter reporter)throwsIOException {
String line=value.toString();
StringTokenizer itr=newStringTokenizer(line);while(itr.hasMoreTokens()) {
word.set(itr.nextToken());
output.collect(word, one);
}
}
}/*** A reducer class that just emits the sum of the input values.*/publicstaticclassReduceextendsMapReduceBaseimplementsReducer{publicvoidreduce(Text key, Iteratorvalues,
OutputCollectoroutput, Reporter reporter)throwsIOException {intsum=0;while(values.hasNext()) {
sum+=values.next().get();
}
output.collect(key,newIntWritable(sum));
}
}staticintprintUsage() {
System.out.println("wordcount [-m ] [-r ] ");
ToolRunner.printGenericCommandUsage(System.out);return-1;
}/*** The main driver for word count map/reduce program. Invoke this method to
* submit the map/reduce job.
*
*@throwsIOException
* When there is communication problems with the job tracker.*/publicintrun(String[] args)throwsException {
JobConf conf=newJobConf(getConf(), WordCount.class);
conf.setJobName("wordcount");//the keys are words (strings)conf.setOutputKeyClass(Text.class);//the values are counts (ints)conf.setOutputValueClass(IntWritable.class);
conf.setMapperClass(MapClass.class);
conf.setCombinerClass(Reduce.class);
conf.setReducerClass(Reduce.class);
Listother_args=newArrayList();for(inti=0; i
conf.setNumMapTasks(Integer.parseInt(args[++i]));
}elseif("-r".equals(args[i])) {
conf.setNumReduceTasks(Integer.parseInt(args[++i]));
}else{
other_args.add(args[i]);
}
}catch(NumberFormatException except) {
System.out.println("ERROR: Integer expected instead of"+args[i]);returnprintUsage();
}catch(ArrayIndexOutOfBoundsException except) {
System.out.println("ERROR: Required parameter missing from"+args[i-1]);returnprintUsage();
}
}//Make sure there are exactly 2 parameters left.if(other_args.size()!=2) {
System.out.println("ERROR: Wrong number of parameters:"+other_args.size()+"instead of 2.");returnprintUsage();
}
FileInputFormat.setInputPaths(conf, other_args.get(0));
FileOutputFormat.setOutputPath(conf,newPath(other_args.get(1)));
JobClient.runJob(conf);return0;
}publicstaticvoidmain(String[] args)throwsException {intres=ToolRunner.run(newConfiguration(),newWordCount(), args);
System.exit(res);
}
}