1、概述
窗口中的水位线取的是两条流中的最小值;
一个流中的元素如果没有与另一个流中的元素组合起来,它就不会被输出;
2、代码示例
import org.apache.flink.api.common.eventtime.SerializableTimestampAssigner;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.functions.JoinFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.assigners.SlidingEventTimeWindows;
import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows;
import java.time.Duration;
/**
* 注意:
* <p>
* 窗口中的水位线取的是两条流中的最小值;
* 一个流中的元素如果没有与另一个流中的元素组合起来,它就不会被输出;
*/
public class _02_WindowSlidingEventJoin {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// 测试时限制了分区数,生产中需要设置空闲数据源
env.setParallelism(2);
env.disableOperatorChaining();
DataStreamSource<String> inputLeft = env.socketTextStream("localhost", 8888);
// 事件时间需要设置水位线策略和时间戳
SingleOutputStreamOperator<Tuple2<String, Long>> mapLeft = inputLeft.map(new MapFunction<String, Tuple2<String, Long>>() {
@Override
public Tuple2<String, Long> map(String input) throws Exception {
String[] fields = input.split(",");
return new Tuple2<>(fields[0], Long.parseLong(fields[1]));
}
});
SingleOutputStreamOperator<Tuple2<String, Long>> watermarkLeft = mapLeft.assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple2<String, Long>>forBoundedOutOfOrderness(Duration.ofSeconds(0))
.withTimestampAssigner(new SerializableTimestampAssigner<Tuple2<String, Long>>() {
@Override
public long extractTimestamp(Tuple2<String, Long> input, long l) {
return input.f1;
}
}));
DataStreamSource<String> inputRight = env.socketTextStream("localhost", 9999);
// 事件时间需要设置水位线策略和时间戳
SingleOutputStreamOperator<Tuple2<String, Long>> mapRight = inputRight.map(new MapFunction<String, Tuple2<String, Long>>() {
@Override
public Tuple2<String, Long> map(String input) throws Exception {
String[] fields = input.split(",");
return new Tuple2<>(fields[0], Long.parseLong(fields[1]));
}
});
SingleOutputStreamOperator<Tuple2<String, Long>> watermarkRight = mapRight.assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple2<String, Long>>forBoundedOutOfOrderness(Duration.ofSeconds(0))
.withTimestampAssigner(new SerializableTimestampAssigner<Tuple2<String, Long>>() {
@Override
public long extractTimestamp(Tuple2<String, Long> input, long l) {
return input.f1;
}
}));
watermarkLeft.join(watermarkRight)
.where(e -> e.f0)
.equalTo(e -> e.f0)
.window(SlidingEventTimeWindows.of(Duration.ofSeconds(6), Duration.ofSeconds(3)))
.apply(new JoinFunction<Tuple2<String, Long>, Tuple2<String, Long>, Tuple3<String, Long, Long>>() {
@Override
public Tuple3<String, Long, Long> join(Tuple2<String, Long> t1, Tuple2<String, Long> t2) throws Exception {
return new Tuple3<>(t1.f0, t1.f1, t2.f1);
}
})
.print();
env.execute();
}
}
3、测试用例
left-1
a,1718089200000
b,1718089200000
c,1718089200000
left-watermark=1718089199999
window-watermark=no_watermark
right-2
a,1718089201000
b,1718089201000
c,1718089201000
right-watermark=1718089200999
window-watermark=1718089199999[两条流中最小的]
left-3
a,1718089204000
b,1718089204000
c,1718089204000
left-watermark=1718089203999
window-watermark=1718089200999
right-4
a,1718089205000
b,1718089205000
c,1718089205000
right-watermark=1718089204999
window-watermark=1718089203999
res:[1718089197000~1718089203000]
2> (a,1718089200000,1718089201000)
1> (b,1718089200000,1718089201000)
1> (c,1718089200000,1718089201000)
left-5
a,1718089209000
b,1718089209000
c,1718089209000
left-watermark=1718089208999
window-watermark=1718089204999
right-6
a,1718089209000
b,1718089209000
c,1718089209000
right-watermark=1718089208999
window-watermark=1718089208999
res[1718089200000~1718089206000]
2> (a,1718089200000,1718089201000)
2> (a,1718089200000,1718089205000)
2> (a,1718089204000,1718089201000)
2> (a,1718089204000,1718089205000)
1> (b,1718089200000,1718089201000)
1> (b,1718089200000,1718089205000)
1> (b,1718089204000,1718089201000)
1> (b,1718089204000,1718089205000)
1> (c,1718089200000,1718089201000)
1> (c,1718089200000,1718089205000)
1> (c,1718089204000,1718089201000)
1> (c,1718089204000,1718089205000)
res[1718089203000~1718089209000]
2> (a,1718089204000,1718089205000)
1> (b,1718089204000,1718089205000)
1> (c,1718089204000,1718089205000)