导语:flink是一款优秀的批处理和流处理的大数据计算引擎,本文将通过flink的java api实现wordCount.
环境准备:idea, maven
实验:
1、maven 内容:
<!-- flink-->
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-core -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-core</artifactId>
<version>1.13.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-streaming-java -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_2.12</artifactId>
<version>1.13.1</version>
<!-- <scope>provided</scope>-->
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-clients -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients_2.12</artifactId>
<version>1.13.2</version>
</dependency>
2、文件准备
hello.txt
3、代码内容
package flinkTest;
// @Time 2021/9/8
// @Author HaiRu,WU
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.operators.AggregateOperator;
import org.apache.flink.api.java.operators.DataSource;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.util.Collector;
// 批处理word count 实验
public class WordCount {
public static void main(String[] args) throws Exception {
// 创建执行环境
ExecutionEnvironment environment = ExecutionEnvironment.getExecutionEnvironment();
// 从文件读取数据
String inputPath = "D:\\IdeaProjects\\Demo01\\src\\main\\resources\\hello.txt";
DataSource<String> inputDataSet = environment.readTextFile(inputPath);
//对数据集进行处理, 空格分词展开,转换成(word,1)这样的二元组进行统计
DataSet<Tuple2<String, Integer>> resultSet = inputDataSet.flatMap(new MyFlatMapper())
//按照第一个位置的word分组
.groupBy(0)
//将第二个位置的数据求和
.sum(1);
//打印输出
resultSet.print();
}
// 自定义类,实现FlatMapFunction接口
public static class MyFlatMapper implements FlatMapFunction<String, Tuple2<String, Integer> >{
@Override
public void flatMap(String value, Collector<Tuple2<String, Integer>> out) throws Exception {
// 按照空格进行分词
String[] words = value.split(" ");
// 遍历所有word包成二元组
for (String word:words){
out.collect(new Tuple2<String, Integer>(word, 1));
}
}
}
}
4、结果展示