Flink快速上手
1 pom依赖
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-java</artifactId>
<version>1.10.1</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_2.12</artifactId>
<version>1.10.1</version>
</dependency>
2 任务-统计文件中每次单词出现的次数
hello world
execute
hello china beijing
hello china shanghai
2.1 批处理 wordcount (一次性处理)
public class WorkCount {
public static void main(String[] args) throws Exception {
// 1. 创建执行环境
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// 2.从文件中读取数据,一行一行读取数据
String inputPath = "hello.txt";
DataSet<String> inputDataSet = env.readTextFile(inputPath);
// 3.计算hello.txt每个单词出现的次数
DataSet<Tuple2<String, Integer>> wordCountDataSet = inputDataSet.flatMap(new MyFlatMap())
.groupBy(0)
.sum(1);
// 4. 控制台输出
wordCountDataSet.print();
}
/**
* <String, Tuple2<String, Integer>>
* String 输入类型
* Tuple2<String, Integer> 转换的输出类型
*/
public static class MyFlatMap implements FlatMapFunction<String, Tuple2<String, Integer>> {
public void flatMap(String s, Collector<Tuple2<String, Integer>> out) throws Exception {
// 按空格隔开
String[] words = s.split(" ");
for (String word : words) {
out.collect(new Tuple2<String, Integer>(word, 1));
}
}
}
}
- 结果
2.2 流处理 wordcount (接收一条数据处理一条)
hello world
hello java
hello flink
use flink
public class StreamWordCount {
public static void main(String[] args) throws Exception {
// 1. 创建执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// 2. 连接本地socket 数据流d
DataStream<String> dataStream = env.socketTextStream("127.0.0.1", 60000);
// 3. 数据转换
DataStream<Tuple2<String, Integer>> wordCountDataStream = dataStream.flatMap(new MyFlatMap()).keyBy(0).sum(1);
// 4. 设置一个线程输出
wordCountDataStream.print().setParallelism(1);
env.execute();
}
public static class MyFlatMap implements FlatMapFunction<String, Tuple2<String, Integer>> {
public void flatMap(String s, Collector<Tuple2<String, Integer>> out) throws Exception {
String[] words = s.split(" ");
for (String word : words) {
out.collect(new Tuple2(word, 1));
}
}
}
}
- 输出