- wordcount例子
- public class WordCount {
- public static class Map extends MapReduceBase implements Mapper<LongWritable, Text, Text, IntWritable> {
- private final static IntWritable one = new IntWritable(1);
- private Text word = new Text();
- public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException {
- String line = valuetoString();
- StringTokenizer tokenizer = new StringTokenizer(line);
- while (tokenizerhasMoreTokens()) {
- wordset(tokenizernextToken());
- outputcollect(word, one);
- }
- }
- }
- public static class Reduce extends MapReduceBase implements Reducer<Text, IntWritable, Text, IntWritable> {
- public void reduce(Text key, Iterator<IntWritable> values, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException {
- int sum = 0;
- while (valueshasNext()) {
- sum += valuesnext()get();
- }
- outputcollect(key, new IntWritable(sum));
- }
- }
- public static void main(String[] args) throws Exception {
- JobConf conf = new JobConf(WordCountclass);
- confsetJobName("wordcount");
- confsetOutputKeyClass(Textclass);
- confsetOutputValueClass(IntWritableclass);
- confsetMapperClass(Mapclass);
- confsetCombinerClass(Reduceclass);
- confsetReducerClass(Reduceclass);
- confsetInputFormat(TextInputFormatclass);
- confsetOutputFormat(TextOutputFormatclass);
- FileInputFormatsetInputPaths(conf, new Path(args[0]));
- FileOutputFormatsetOutputPath(conf, new Path(args[1]));
- JobClientrunJob(conf);
- }
- }
运行命令
:hadoop jar /usr/local/hadoop/hadoop-examples-1.1.2.jar wordcount dedup_in dedup_out
/usr/local/hadoop/hadoop-examples-1.1.2.jar 是jar包在linux系统的绝对路径
wordcount是类名,一般要写全路径
dedup_in 是输入文件的路径,是hdfs上的绝对路径。
dedup_out 是输出结果文件的路径,