在eclipse中配置好hadoop插件后,编写wordCount程序如下
package hadoop01;
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class WordCountMapReduce {
// step 1: Mapper
public static class WordCountMapper extends
Mapper<LongWritable, Text, Text, IntWritable> {
private Text mapOutputKey = new Text();
private IntWritable mapOutputValue = new IntWritable(1);
@Override
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
// line value
String lineValue = value.toString();
// spilt
// String[] strs = lineValue.split(" ");
StringTokenizer stringTokenizer = new StringTokenizer(lineValue);
while (stringTokenizer.hasMoreTokens()) {
// set map output key
mapOutputKey.set(stringTokenizer.nextToken());
// output
context.write(mapOutputKey, mapOutputValue);
}
/**
* // iterator for (String str : strs) {
*
* mapOutputKey.set(str);
*
* context.write(mapOutputKey, mapOutputValue);
*
* }
*/
}
}
// step 2: Reducer