目录
主类:
package practice;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;
/**
* @author pangsl
* @date 2021/5/25 10:05
* @Description flink使用DataStream API处理无界数据流
*/
public class StreamWordCount {
public static void main(String[] args) throws Exception{
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStream<Tuple2<String,Integer>> dataStream = env.addSource(new MySource()).flatMap(new Splitter()).keyBy(0).sum(1);
dataStream.print();
env.execute("WordCount");
}
public static class Splitter implements FlatMapFunction<String, Tuple2<String,Integer>> {
//@Override
public void flatMap(String sentence, Collector<Tuple2<String, Integer>> out) throws Exception {
//使用空格分割单词
for (String word: sentence.split(" ")){
out.collect(new Tuple2<String, Integer>(word,1));
}
}
}
}
MySource类(生成数据端)
package practice;
import org.apache.flink.streaming.api.functions.source.SourceFunction;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
/**
* @author pangsl
* @date 2021/5/25 10:19
* @Description
*/
public class MySource implements SourceFunction<String> {
private long count = 1L;
private boolean isRunning = true;
//通过循环产生数据
public void run(SourceContext<String> ctx) throws Exception {
while (isRunning){
//单词流
List<String> stringList = new ArrayList<String>();
stringList.add("world");
stringList.add("flink");
stringList.add("stream");
stringList.add("batch");
stringList.add("table");
stringList.add("sql");
stringList.add("hello");
int size = stringList.size();
int i = new Random().nextInt(size);
ctx.collect(stringList.get(i));
//每秒产生一条数据
Thread.sleep(1000);
}
}
//取消执行
public void cancel() {
isRunning = false;
}
}
pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>flinkdemo</artifactId>
<groupId>com.flink.pangsl</groupId>
<version>1.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>Flink_shizhanpai</artifactId>
<dependencies>
<!-- flink核心依赖-->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-core</artifactId>
<version>1.12.0</version>
<scope>provided</scope>
</dependency>
<!-- flink流处理和批处理的依赖-->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_2.11</artifactId>
<version>1.12.0</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients_2.12</artifactId>
<version>1.12.0</version>
<scope>provided</scope>
</dependency>
</dependencies>
</project>
输出结果:
添加时间窗口
修改上面主类,在keyBy(0)后面添加时间窗口
.timeWindow(Time.seconds(10))
package practice;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.util.Collector;
/**
* @author pangsl
* @date 2021/5/25 10:05
* @Description flink使用DataStream API处理无界数据流
*/
public class StreamWordCount {
public static void main(String[] args) throws Exception{
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStream<Tuple2<String,Integer>> dataStream = env.addSource(new MySource())
.flatMap(new Splitter())
.keyBy(0)
.timeWindow(Time.seconds(10))
.sum(1);
dataStream.print();
env.execute("WordCount");
}
public static class Splitter implements FlatMapFunction<String, Tuple2<String,Integer>> {
//@Override
public void flatMap(String sentence, Collector<Tuple2<String, Integer>> out) throws Exception {
//使用空格分割单词
for (String word: sentence.split(" ")){
out.collect(new Tuple2<String, Integer>(word,1));
}
}
}
}
结果输出
注意:
此处 .timeWindow(Time.seconds(10)) 只适用于Flink的1.11.0版本。
如果是Flink的1.12.0版本,则会报错: