以案例驱动对join的认知
一、前提:
- 要想两个数据流进行Join,必须对两个流数据划分相同的窗口,在同一个窗口中,进行数据的Join连接。
- 这里使用EventTime,划分滚动窗口
- Flink只支持等值Join,即key相等的
二、数据源
1、StreamDataSourceA
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction;
public class StreamDataSourceA extends RichParallelSourceFunction<Tuple3<String, String, Long>> {
private volatile boolean flag = true;
@Override
public void run(SourceContext<Tuple3<String, String, Long>> ctx) throws Exception {
Tuple3[] elements = new Tuple3[]{
Tuple3.of("a", "1", 1000000050000L),
Tuple3.of("a", "2", 1000000054000L),
Tuple3.of("a", "3", 1000000079900L),
Tuple3.of("a", "4", 1000000115000L),
Tuple3.of("b", "5", 1000000100000L),
Tuple3.of("b", "6", 1000000108000L)
};
int count = 0;
while (flag && count < elements.length) {
ctx.collect(Tuple3.of((String) elements[count].f0,
(String) elements[count].f1, (Long) elements[count].f2));
count++;
Thread.sleep(1000);
}
}
@Override
public void cancel() {
flag = false;
}
}
2、StreamDataSourceB
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction;
public class StreamDataSourceB extends RichParallelSourceFunction<Tuple3<String, String, Long>> {
private volatile boolean flag = true;
@Override
public void run(SourceContext<Tuple3<String, String, Long>> ctx) throws Exception {
Tuple3[] elements = new Tuple3[]{
Tuple3.of("a", "hangzhou", 1000000059000L),
Tuple3.of("b", "beijing", 1000000105000L),
};
int count = 0;
while (flag && count < elements.length) {
ctx.collect(new Tuple3<>((String) elements[count].f0,
(String) elements[count].f1, (long) elements