创建3个类就可以了
1、 MAP.CLASS
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
//前2个为map方法的入口(行号,内容),后两个为map方法的出口
public class WordMapper extends Mapper<LongWritable, Text,Text, IntWritable> {
private Text word=new Text();
private IntWritable count=new IntWritable(1);
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
//拿出值再转化为字符串,对里面的符号进行替换,替换成空格。最后进行分割
String[] words = value.toString().replaceAll(",|\"|\\.|\\?||!|:","").split(" ");
//分割后对其设置,再发送出去
for (String _word : words) {
word.set(_word);
context.write(word,count);
}
}
}
2、RE