1、概述
原理:在窗口操作之后,元素会携带一个 event-time 或 processing-time timestamp;
示例:第一个操作中时间窗口[0, 5)
的结果会出现在下一个窗口操作的 [0, 5)
窗口中,即先在一个窗口内按 key 求和,再在下一个操作中拼接输出对应的元素。
2、代码示例
import org.apache.flink.api.common.eventtime.SerializableTimestampAssigner;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.ProcessFunction;
import org.apache.flink.streaming.api.functions.windowing.ProcessAllWindowFunction;
import org.apache.flink.streaming.api.functions.windowing.WindowFunction;
import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
import org.apache.flink.util.Collector;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
/**
* 原理:在窗口操作之后,元素会携带一个 event-time 或 processing-time timestamp
* <p>
* 示例:第一个操作中时间窗口`[0, 5)` 的结果会出现在下一个窗口操作的 `[0, 5)` 窗口中,即先在一个窗口内按 key 求和,再在下一个操作中拼接输出对应的元素。
*/
public class _17_ContinusKeyByTwoOperator {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(2);
DataStreamSource<String> source = env.socketTextStream("localhost", 8888);
// 事件时间需要设置水位线策略和时间戳
SingleOutputStreamOperator<Tuple3<String, Integer, Long>> map = source.map(new MapFunction<String, Tuple3<String, Integer, Long>>() {
@Override
public Tuple3<String, Integer, Long> map(String input) throws Exception {
String[] fields = input.split(",");
return new Tuple3<>(fields[0], Integer.parseInt(fields[1]), Long.parseLong(fields[2]));
}
});
SingleOutputStreamOperator<Tuple3<String, Integer, Long>> watermarks = map.assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple3<String, Integer, Long>>forBoundedOutOfOrderness(Duration.ofSeconds(0))
.withTimestampAssigner(new SerializableTimestampAssigner<Tuple3<String, Integer, Long>>() {
@Override
public long extractTimestamp(Tuple3<String, Integer, Long> input, long l) {
return input.f2;
}
}));
SingleOutputStreamOperator<Tuple3<String, Integer, Long>> resultsPerKey = watermarks.keyBy(e -> e.f0)
.window(TumblingEventTimeWindows.of(Duration.ofSeconds(3)))
.reduce(new ReduceFunction<Tuple3<String, Integer, Long>>() {
@Override
public Tuple3<String, Integer, Long> reduce(Tuple3<String, Integer, Long> tuple3, Tuple3<String, Integer, Long> t1) throws Exception {
return new Tuple3<>(tuple3.f0, tuple3.f1 + t1.f1, System.currentTimeMillis());
}
});
resultsPerKey
.keyBy(e -> e.f0)
.window(TumblingEventTimeWindows.of(Duration.ofSeconds(6)))
.apply(new WindowFunction<Tuple3<String, Integer, Long>, Tuple3<String,String,Long>, String, TimeWindow>() {
@Override
public void apply(String s, TimeWindow timeWindow, Iterable<Tuple3<String, Integer, Long>> iterable, Collector<Tuple3<String,String,Long>> collector) throws Exception {
System.out.println("Window=>" + timeWindow.getStart() + "-" + timeWindow.getEnd());
String res = "";
String key = "";
for (Tuple3<String, Integer, Long> tuple3 : iterable) {
res += "-" + tuple3.f1;
key = tuple3.f0;
}
collector.collect(new Tuple3<>(key, res, System.currentTimeMillis()));
}
})
.print();
env.execute();
}
}
3、测试用例
a,1,1718157600000
b,2,1718157600000
c,3,1718157600000
a,1,1718157602000
b,2,1718157602000
c,3,1718157602000
a,1,1718157604000
b,2,1718157604000
c,3,1718157604000
a,1,1718157606000
b,2,1718157606000
Window=>1718157600000-1718157606000
2> (a,-2-1,1718183727132)
Window=>1718157600000-1718157606000
1> (b,-4-2,1718183727132)
Window=>1718157600000-1718157606000
1> (c,-6-3,1718183727133)
c,3,1718157606000