实验环境:hadoop+java jdk+ubuntu
准备数据文件
新建一个叫test的文本文件
pass:gedit 是一款文本编辑器,非常好用,没有的可以改为vi或vim
内容随便输
a b d aaa
das fs aa
ddd fssf
fsa aa
www werf
faa
编写代码
同样的,新建一个WordCountMapper.java,WordCountReducer.java,WordCount.java
并将以下代码复制进去
WordCountMapper.java
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
import java.util.StringTokenizer;
public class WordCountMapper extends Mapper<LongWritable,Text,Text,IntWritable>{
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
//得到输入的每一行数据
String line=value.toString();
StringTokenizer st=new StringTokenizer(line);
while (st.hasMoreTokens()){
String word= st.