首先,完成mapper类
package sinc.hadoops.mr;
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class WCMapper extends Mapper<LongWritable, Text, Text, LongWritable> {
@Override
protected void map(LongWritable key, Text value,Context context)
throws IOException, InterruptedException {
String line = value.toString();
String[] words = line.split(" ");
for (String w : words) {
context.write(new Text(w), new LongWritable(1));
}
}
}
再完成reduce类
package sinc.hadoops.mr;
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class WCReducer extends Reducer<Text, LongWritable, Text, LongWritable> {
@Override
protected void reduce(Text key, Iterable<LongWritable> values, Context context)
throws IOException, InterruptedException {
long counter = 0;
for (LongWritable l : values) {
counter += l.get();
}
context.write(key, new LongWritable(counter));
}
}
最后完成主类main方法:
package sinc.hadoops.mr;
import java.io.IOException;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class WordCount {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
if (args.length < 2) {
System.err.println("Input math and out path");
System.exit(-1);
}
Job job = Job.getInstance();
//重要:main方法所在类
job.setJarByClass(WordCount.class);
//设置mapper相关属性
job.setMapperClass(WCMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(LongWritable.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
//设置reducer相关属性
job.setReducerClass(WCReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.waitForCompletion(true);
}
}
至此,编写完成。
Hdfs上放入文件/tmp/test
hello zhang san hello zhang si hello nihao
hello zhang wu
执行:hadoop jar wordcount.jar /tmp/test /tmp/201701181347
查看:hadoop fs -ls /tmp/201701181347
-rw-r--r-- 2 root supergroup 0 2017-01-18 13:53 /tmp/201701181347/_SUCCESS
-rw-r--r-- 2 root supergroup 40 2017-01-18 13:53 /tmp/201701181347/part-r-00000
查看结果:hadoop fs -cat /tmp/201701181347/part-r-00000
hello 4
nihao 1
san 1
si 1
wu 1
zhang 3
至此,测试结束。