(一) JAVA-API 简单实现MapReduce统计单词
()这是要处理的文件
1.导入maven依赖
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.8.1</version>
</dependency>
2.创建Mapper类继承类 org.apache.hadoop.mapreduce.Mapper
package org.zsw.hadoop;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
/**
* @description:
* @author: zsw
* @date: Created in 2019/12/31 20:30
* @version: 1
* @modified By:
*/
public class WordCountMap extends Mapper<LongWritable, Text,Text, LongWritable> {
// 由于map方法的调用频率是按行调用,所以其参数表示的意义基本和行相关
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
// key一般来说都是LongWritable类型,表示当前行的起始偏移量,从0开始
// value 值得是这一行具体内容
// context指的是上下文对象,上承接框架底层输入,下启shuffle阶段的输入(通常会说作为reduce阶段的输入)。
String line = value.toString();
String[] vals = line.split(" ");
for (String val : vals) {
context.write(new Text(val