map类,实现map函数
public static class MyMapper extends Mapper<Object, Text, Text, IntWritable>{
private static final IntWritable one=new IntWritable(1);
private static Text word = new Text();
@Override
protected void map(Object key, Text value, Context context) throws IOException, InterruptedException {
StringTokenizer str = new StringTokenizer(value.toString());
while(str.hasMoreTokens()){
word.set(str.nextToken());
context.write(word,one);
}
}
}
reduce类,实现reduce函数
public static class MyReducer extends Reducer<Text, IntWritable, Text, IntWritable>{
private static IntWritable result = new IntWritable();
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int sum = 0;
for(IntWritable val:values){
sum +=val.get();
}
result.set(sum);
context.write(key, result);
}
}
启动mr的driver方法
public static void main(String[] args) throws Exception {
Configuration hadoopConf = new Configuration();
Job job = Job.getInstance(hadoopConf,"wordcount");
job.setJarByClass(WordCount.class);
job.setMapperClass(MyMapper.class);
job.setCombinerClass(MyReducer.class);
job.setReducerClass(MyReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit(job.waitForCompletion(true)? 0:1);
}