目录
一、依赖引入
<!-- flink-java依赖,artifactId后面为scala版本号-->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_2.12</artifactId>
<version>1.10.1</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-java</artifactId>
<version>1.10.1</version>
</dependency>
二、words.txt内容
I never saw a Moor
I never saw the Sea
Yet know I how the Heather looks
And what a Billow be
I never spoke with God
Nor visited in Heaven
Yet certain am I of the spot
As if the Checks were given
三、Flink批处理实现word count
1. 代码
package com.jiam.demo.flink;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.util.Collector;
// flink批处理实现word count
public class WordCountBatch {
public static void main(String[] args) throws Exception {
// 1.创建执行环境
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// 2.从文件中读取数据
String inputPath = "D:\\IDEA\\git_store\\flink_demo\\src\\main\\resources\\words.txt";
DataSet<String> dataSet = env.readTextFile(inputPath);
// 3.按空格对数据进行分词
// 3.1 map之后进行flat扁平化,转换为 word,count 的形式 如<hello,1>
DataSet<Tuple2<String,Integer>> resultSet = dataSet.flatMap(new MyFlatMapper(){})
// 3.2 数据分组,可以按照字段名或字段位置,这里传入0代表使用tuple2第一个位置的数据进行分组
.groupBy(0)
// 3.3 求和,这里传入1代表使用tuple2第二个位置的数据进行求和
.sum(1);
// 4.输出结果
resultSet.print();
}
// 自定义flatMapFunction接口实现类,该接口为函数式接口
// flatMapFunction需要泛型<T,O>,T为输入类型,O为输出类型
// Tuple2需要泛型<T0,T1>,这里T0 T1根据输出结果定义
private static class MyFlatMapper implements FlatMapFunction<String, Tuple2<String,Integer>> {
@Override
public void flatMap(String value, Collector<Tuple2<String, Integer>> out) throws Exception {
// 1.按空格分词
String[] words = value.split(" ");
// 2.将word封装为二元组
for (String word : words) {
Tuple2<String,Integer> tuple2 = new Tuple2<>(word,1);
// 使用out收集器收集数据
out.collect(tuple2);
}
}
}
}
2. 结果
可以看到输出的是最终统计结果。
(As,1)
(certain,1)
(what,1)
(with,1)
(Heaven,1)
(Nor,1)
(a,2)
(am,1)
(in,1)
(looks,1)
(Billow,1)
(Yet,2)
(never,3)
(saw,2)
(the,4)
(visited,1)
(God,1)
(Heather,1)
(be,1)
(know,1)
(of,1)
(I,5)
(spot,1)
(And,1)
(Checks,1)
(Moor,1)
(Sea,1)
(given,1)
(spoke,1)
(were,1)
(how,1)
(if,1)
四、Flink流处理实现word count
1. 代码
package com.jiam.demo.flink;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;
// flink流处理执行word count
public class WordCountStream {
public static void main(String[] args) throws Exception {
// 1.创建流处理环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// 2. 从文件中获取数据
String inputPath = "D:\\IDEA\\git_store\\flink_demo\\src\\main\\resources\\words.txt";
DataStream<String> dataStream = env.readTextFile(inputPath);
// 3.基于数据流进行计算
// 3.1 map之后进行flat扁平化,转换为 word,count 的形式 如<hello,1>
DataStream<Tuple2<String,Integer>> resultStream = dataStream.flatMap(new MyFlatMapper())
// 3.2 keyBy类似于group by
.keyBy(0)
.sum(1);
// 4.输出结果
resultStream.print();
// 5.启动流任务
env.execute();
}
private static class MyFlatMapper implements FlatMapFunction<String, Tuple2<String,Integer>> {
@Override
public void flatMap(String value, Collector<Tuple2<String, Integer>> out) throws Exception {
// 1.按空格分词
String[] words = value.split(" ");
// 2.将word封装为二元组
for (String word : words) {
Tuple2<String,Integer> tuple2 = new Tuple2<>(word,1);
// 使用out收集器收集数据
out.collect(tuple2);
}
}
}
}
2. 结果
流处理会存储计算状态,输出结果中单词的出现频次是变化的。
1> (God,1)
5> (the,1)
1> (spot,1)
5> (looks,1)
6> (how,1)
6> (As,1)
6> (a,1)
6> (be,1)
3> (I,1)
3> (spoke,1)
8> (Heather,1)
8> (And,1)
2> (never,1)
4> (what,1)
7> (with,1)
7> (were,1)
7> (certain,1)
7> (of,1)
6> (a,2)
7> (visited,1)
7> (in,1)
3> (Yet,1)
3> (know,1)
3> (I,2)
3> (Checks,1)
3> (given,1)
3> (Yet,2)
3> (I,3)
3> (I,4)
3> (I,5)
5> (if,1)
5> (the,2)
5> (the,3)
5> (Billow,1)
2> (am,1)
8> (Nor,1)
5> (Heaven,1)
5> (Moor,1)
5> (the,4)
2> (never,2)
8> (saw,1)
2> (never,3)
8> (saw,2)
2> (Sea,1)
五、基于Socket的word count
Tips:先启动Socket程序,再启动Flink程序。
1. Socket代码
这里Socket的编写要注意加上writer.newLine() —— 即换行
,否则一是会导致只有输入"#"后消息才能被word count 程序接收,二是会产生TCP粘包问题。
package com.jiam.demo.flink;
import java.io.BufferedWriter;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.net.*;
import java.util.Scanner;
// 编写word count
public class SocketForWordCount {
public static void main(String[] args) throws IOException {
// TCP通信,服务端口8888
ServerSocket serverSocket = new ServerSocket(8888);
System.out.println("开始等待连接...");
Socket accept = serverSocket.accept();
System.out.println("连接成功...");
// 封装udp并发送数据
Scanner sc = new Scanner(System.in);
BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(accept.getOutputStream()));
try {
String line = "";
System.out.println("请输入单词串,各单词以空格分割.");
while (!"#".equals(line = sc.nextLine())) {
writer.write(line);
writer.newLine();
writer.flush();
System.out.println("发送:" + line);
}
} catch (Exception e) {
e.printStackTrace();
} finally {
accept.close();
}
}
}
2. Word Count代码
package com.jiam.demo.flink;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;
// 获取Socket传过来的数据进行word count
public class SocketWordCountStream {
public static void main(String[] args) throws Exception {
// 1.创建流处理环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// 2. 从Socket获取数据
DataStream<String> inputDataStream = env.socketTextStream("localhost",8888);
// 3.基于数据流进行计算
// 3.1 map之后进行flat扁平化,转换为 word,count 的形式 如<hello,1>
DataStream<Tuple2<String,Integer>> resultStream = inputDataStream.flatMap(new MyFlatMapper())
// 3.2 keyBy类似于group by
.keyBy(0)
.sum(1);
// 4.输出结果
resultStream.print();
// 5.启动流任务
env.execute();
}
private static class MyFlatMapper implements FlatMapFunction<String, Tuple2<String,Integer>> {
@Override
public void flatMap(String value, Collector<Tuple2<String, Integer>> out) throws Exception {
// 1.按空格分词
String[] words = value.split(" ");
// 2.将word封装为二元组
for (String word : words) {
Tuple2<String,Integer> tuple2 = new Tuple2<>(word,1);
// 使用out收集器收集数据
out.collect(tuple2);
}
}
}
}
3. 结果
消息发送服务端,输入后回车即会将该条消息发送到客户端。
消息客户端,word count程序会在每次接受到来自消息发送端的单词串时进行统计。