1、概述
原理:在窗口操作之后,元素会携带一个 event-time 或 processing-time timestamp;
示例:第一个操作中时间窗口[0, 5)
的结果会出现在下一个窗口操作的 [0, 5)
窗口中,即先在一个窗口内按 key 求和,再在下一个操作中找出这个窗口中 top-k 的元素。
2、代码示例
import org.apache.flink.api.common.eventtime.SerializableTimestampAssigner;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.windowing.ProcessAllWindowFunction;
import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
import org.apache.flink.util.Collector;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
/**
* 原理:在窗口操作之后,元素会携带一个 event-time 或 processing-time timestamp
* <p>
* 示例:第一个操作中时间窗口`[0, 5)` 的结果会出现在下一个窗口操作的 `[0, 5)` 窗口中,即先在一个窗口内按 key 求和,再在下一个操作中找出这个窗口中 top-k 的元素。
*/
public class _16_ContinusKeyByWindowAllOperator {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(2);
DataStreamSource<String> source = env.socketTextStream("localhost", 8888);
// 事件时间需要设置水位线策略和时间戳
SingleOutputStreamOperator<Tuple3<String, Integer, Long>> map = source.map(new MapFunction<String, Tuple3<String, Integer, Long>>() {
@Override
public Tuple3<String, Integer, Long> map(String input) throws Exception {
String[] fields = input.split(",");
return new Tuple3<>(fields[0], Integer.parseInt(fields[1]), Long.parseLong(fields[2]));
}
});
SingleOutputStreamOperator<Tuple3<String, Integer, Long>> watermarks = map.assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple3<String, Integer, Long>>forBoundedOutOfOrderness(Duration.ofSeconds(0))
.withTimestampAssigner(new SerializableTimestampAssigner<Tuple3<String, Integer, Long>>() {
@Override
public long extractTimestamp(Tuple3<String, Integer, Long> input, long l) {
return input.f2;
}
}));
SingleOutputStreamOperator<Tuple3<String, Integer, Long>> resultsPerKey = watermarks.keyBy(e -> e.f0)
.window(TumblingEventTimeWindows.of(Duration.ofSeconds(5)))
.reduce(new ReduceFunction<Tuple3<String, Integer, Long>>() {
@Override
public Tuple3<String, Integer, Long> reduce(Tuple3<String, Integer, Long> tuple3, Tuple3<String, Integer, Long> t1) throws Exception {
Tuple3<String, Integer, Long> tupleRes = new Tuple3<>(tuple3.f0, tuple3.f1 + t1.f1, System.currentTimeMillis());
System.out.println("ReduceRes=>" + tupleRes);
return tupleRes;
}
});
resultsPerKey
.windowAll(TumblingEventTimeWindows.of(Duration.ofSeconds(5)))
.process(new ProcessAllWindowFunction<Tuple3<String, Integer, Long>, Tuple3<String, Integer, Long>, TimeWindow>() {
@Override
public void process(ProcessAllWindowFunction<Tuple3<String, Integer, Long>, Tuple3<String, Integer, Long>, TimeWindow>.Context context, Iterable<Tuple3<String, Integer, Long>> iterable, Collector<Tuple3<String, Integer, Long>> collector) throws Exception {
List<Tuple3<String, Integer, Long>> actualList = new ArrayList<>();
iterable.iterator().forEachRemaining(actualList::add);
actualList.sort(new Comparator<Tuple3<String, Integer, Long>>() {
@Override
public int compare(Tuple3<String, Integer, Long> o1, Tuple3<String, Integer, Long> o2) {
return o2.f1 - o1.f1;
}
});
System.out.println("WindowAll=>" + context.window().getStart() + "-" + context.window().getEnd());
for (int i = 0; i < 1; i++) {
collector.collect(new Tuple3<>(actualList.get(i).f0, actualList.get(i).f1, context.window().maxTimestamp()));
}
}
})
.print();
env.execute();
}
}
3、测试用例
a,1,1718157600000
b,2,1718157600000
c,3,1718157600000
a,1,1718157602000
b,2,1718157602000
c,3,1718157602000
ReduceRes=>(b,4,1718181290845)
ReduceRes=>(a,2,1718181290845)
ReduceRes=>(c,6,1718181291259)
a,1,1718157604000
b,2,1718157604000
c,3,1718157604000
ReduceRes=>(b,6,1718181306696)
ReduceRes=>(a,3,1718181306696)
ReduceRes=>(c,9,1718181306901)
a,1,1718157605001
b,2,1718157605001
WindowAll=>1718157600000-1718157605000
2> (c,9,1718157604999)
c,3,1718157605001