问题:由于数据不连续,导致某个时间段最后一个窗口一直不会触发,一直等到新数据流入才会触发。
解决:通过重写WatermarkStrategy方法进行控制。
思路:每来一条新数据时会触发一次onEvent方法,如无参数控制,onPeriodicEmit会周期性触发。所以可以通过判断最后一条新数据进来的时间与系统时间做比较,一旦x秒数据数据没有进来则触发新的watermark逻辑;
@Slf4j
public class WatermarkStrategyFunction {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.enableCheckpointing(50000L);
DataStreamSource<String> inputStream = env.socketTextStream("10.10.14.151", 7788);
inputStream.map(value -> JSON.parseObject(value,Order.class));
SingleOutputStreamOperator<Order> result = inputStream.map(message -> JSON.parseObject(message,Order.class))
.assignTimestampsAndWatermarks(WatermarkStrategy.<Order>forBoundedOutOfOrderness(Duration.ofSeconds(3))
.withTimestampAssigner((event, recordTimestamp) -> event.getAmount()))
.assignTimestampsAndWatermarks(new WatermarkDemoFunction()
.withTimestampAssigner((event, recordTimestamp) -> event.getAmount())
.withIdleness(Duration.ofSeconds(1)))
.keyBy(Order::getName)
.window(TumblingEventTimeWindows.of(Time.seconds(5)))
.allowedLateness(Time.minutes(1))
.sideOutputLateData(outTag)
.sum("id")
.setParallelism(2);
result.getSideOutput(outTag).print();
result.print("result");
env.execute();
}
public static OutputTag<Order> outTag = new OutputTag<Order>("outTag"){};
public static class WatermarkDemoFunction implements WatermarkStrategy<Order>{
private Tuple2<Long,Boolean> state = Tuple2.of(0L,true);
@Override
public WatermarkGenerator<Order> createWatermarkGenerator(WatermarkGeneratorSupplier.Context context) {
return new WatermarkGenerator<Order>() {
private long maxWatermark;
@Override
public void onEvent(Order waterSensor, long l, WatermarkOutput watermarkOutput) {
maxWatermark = Math.max(maxWatermark,waterSensor.getAmount());
System.out.println("maxWatermark is " + maxWatermark);
state.f0 = System.currentTimeMillis();
state.f1 = false;
}
@Override
public void onPeriodicEmit(WatermarkOutput watermarkOutput) {
//乱序时间
long outOfTime = 3000L;
if (maxWatermark - outOfTime <=0){
} else {
//10s内没有数据则关闭当前窗口
if (System.currentTimeMillis() - state.f0 >= 10000L && !state.f1){
watermarkOutput.emitWatermark(new Watermark(maxWatermark + 5000L));
state.f1 = true;
System.out.println("触发窗口");
} else {
System.out.println("正常发送水印");
watermarkOutput.emitWatermark(new Watermark(maxWatermark - outOfTime));
}
}
}
};
}
}
}
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
/**
* @date 2021/11/25
*/
@Data
@AllArgsConstructor
@NoArgsConstructor
public class Order {
private int id;
private String name;
private Long amount;
}