MapReduce实现wordcount
Mapreduce实现主要是通过继承map和reduce类完成。在map中,让每一个节点对其邮件资源进行单词计数,构成键值对输出;reduce中接受键值对,对键相同的对进行合并,即可将各个节点的统计信息加以汇总。
将文件传到hdfs上就可以进行wordcount统计啦,下面是代码:
import java.io.IOException;
import java.util.Iterator;
import
java.util.StringTokenizer;
import
org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import
org.apache.hadoop.io.IntWritable;
import
org.apache.hadoop.io.Text;
import
org.apache.hadoop.mapreduce.Job;
import
org.apache.hadoop.mapreduce.Mapper;
import
org.apache.hadoop.mapreduce.Reducer;
import
org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import
org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import
org.apache.hadoop.util.GenericOptionsParser;
public class WordCount {
public WordCount()