hadoop c语言例子,Hadoop实例WordCount程序一步一步运行

本文详细介绍了一个使用Apache Hadoop进行单词计数的基本MapReduce程序WordCount,通过Mapper和Reducer实现文本中单词及其出现次数的统计。讲解了配置、输入输出格式及命令行参数的设置。
摘要由CSDN通过智能技术生成

importjava.io.IOException;importjava.util.ArrayList;importjava.util.Iterator;importjava.util.List;importjava.util.StringTokenizer;importorg.apache.hadoop.conf.Configuration;importorg.apache.hadoop.conf.Configured;importorg.apache.hadoop.fs.Path;importorg.apache.hadoop.io.IntWritable;importorg.apache.hadoop.io.LongWritable;importorg.apache.hadoop.io.Text;importorg.apache.hadoop.mapred.FileInputFormat;importorg.apache.hadoop.mapred.FileOutputFormat;importorg.apache.hadoop.mapred.JobClient;importorg.apache.hadoop.mapred.JobConf;importorg.apache.hadoop.mapred.MapReduceBase;importorg.apache.hadoop.mapred.Mapper;importorg.apache.hadoop.mapred.OutputCollector;importorg.apache.hadoop.mapred.Reducer;importorg.apache.hadoop.mapred.Reporter;importorg.apache.hadoop.util.Tool;importorg.apache.hadoop.util.ToolRunner;publicclassWordCountextendsConfiguredimplementsTool {publicstaticclassMapClassextendsMapReduceBaseimplementsMapper{privatefinalstaticIntWritable one=newIntWritable(1);privateText word=newText();publicvoidmap(LongWritable key, Text value,

OutputCollectoroutput, Reporter reporter)throwsIOException {

String line=value.toString();

StringTokenizer itr=newStringTokenizer(line);while(itr.hasMoreTokens()) {

word.set(itr.nextToken());

output.collect(word, one);

}

}

}/*** A reducer class that just emits the sum of the input values.*/publicstaticclassReduceextendsMapReduceBaseimplementsReducer{publicvoidreduce(Text key, Iteratorvalues,

OutputCollectoroutput, Reporter reporter)throwsIOException {intsum=0;while(values.hasNext()) {

sum+=values.next().get();

}

output.collect(key,newIntWritable(sum));

}

}staticintprintUsage() {

System.out.println("wordcount [-m ] [-r ] ");

ToolRunner.printGenericCommandUsage(System.out);return-1;

}/*** The main driver for word count map/reduce program. Invoke this method to

* submit the map/reduce job.

*

*@throwsIOException

* When there is communication problems with the job tracker.*/publicintrun(String[] args)throwsException {

JobConf conf=newJobConf(getConf(), WordCount.class);

conf.setJobName("wordcount");//the keys are words (strings)conf.setOutputKeyClass(Text.class);//the values are counts (ints)conf.setOutputValueClass(IntWritable.class);

conf.setMapperClass(MapClass.class);

conf.setCombinerClass(Reduce.class);

conf.setReducerClass(Reduce.class);

Listother_args=newArrayList();for(inti=0; i

conf.setNumMapTasks(Integer.parseInt(args[++i]));

}elseif("-r".equals(args[i])) {

conf.setNumReduceTasks(Integer.parseInt(args[++i]));

}else{

other_args.add(args[i]);

}

}catch(NumberFormatException except) {

System.out.println("ERROR: Integer expected instead of"+args[i]);returnprintUsage();

}catch(ArrayIndexOutOfBoundsException except) {

System.out.println("ERROR: Required parameter missing from"+args[i-1]);returnprintUsage();

}

}//Make sure there are exactly 2 parameters left.if(other_args.size()!=2) {

System.out.println("ERROR: Wrong number of parameters:"+other_args.size()+"instead of 2.");returnprintUsage();

}

FileInputFormat.setInputPaths(conf, other_args.get(0));

FileOutputFormat.setOutputPath(conf,newPath(other_args.get(1)));

JobClient.runJob(conf);return0;

}publicstaticvoidmain(String[] args)throwsException {intres=ToolRunner.run(newConfiguration(),newWordCount(), args);

System.exit(res);

}

}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值