1.WordCount的实现
Map 端:一行行的读取文件,程序转换为中间的KEY/VALUE
例如:两行
My name is liu gang ->My 1 name 1 is 1 liu 1 gang 1
What is your name -> What 1 is 1 your 1 name 1
Reduce端:
相同的KEY肯定会在一起,经过Reduce方法的出来后形成最终的key/value
My 1 name 2 is 2 liu 1 gang 1 What 1 your 1
下面是WordCount的实现
map类
package com.hadoop.mapreduce.wordcount;
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class WordCountMap extends Mapper<Object, Text, Text, IntWritable>{
private IntWritable intValue=new IntWritable(1);
private Text KeyValue=new Text("key");
@Override
protected void map(Object key, Text value,
Context context)
throws IOException, InterruptedException {
//获得一行数据
String lineValue=value.toString();
//分割数据
StringTokenizer token=new StringTokenizer(lineValue, " ");
while (token.hasMoreElements()) {
KeyValue.set(token.nextToken());
context.write(KeyValue, intValue);
}
}
}
Reduce类
package com.hadoop.mapreduce.wordcount;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class WordCountReduce extends Reducer<Text, IntWritable, Text, IntWritable>{
private IntWritable intValue=new IntWritable(0);
@Override
protected void reduce(Text arg0, Iterable<IntWritable> arg1,
Context arg2)
throws IOException, InterruptedException {
int sum=0;
while(arg1.iterator().hasNext()) {
sum+=arg1.iterator().next().get();
}
intValue.set(sum);
arg2.write(arg0, intValue);
}
}
main
package com.hadoop.mapreduce.wordcount;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class WordCount extends Configured implements Tool{
/**
* @param args
* @throws Exception
*/
public static void main(String[] args) throws Exception {
ToolRunner.run(new Configuration(),new WordCount(), args);
System.exit(0);
}
@Override
public int run(String[] arg0) throws Exception {
Configuration conf=getConf();
Job job=new Job(conf, "WordCount");
job.setJarByClass(WordCount.class);
FileInputFormat.addInputPath(job, new Path(arg0[0]));
FileOutputFormat.setOutputPath(job, new Path(arg0[1]));
job.setOutputFormatClass(TextOutputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setMapperClass(WordCountMap.class);
job.setReducerClass(WordCountReduce.class);
job.waitForCompletion(true);
return job.isSuccessful()?0:1;
}
}