上代码:
import org.apache.flink.api.common.eventtime.Watermark;
import org.apache.flink.api.common.eventtime.WatermarkGenerator;
import org.apache.flink.api.common.eventtime.WatermarkOutput;
import org.apache.flink.util.Preconditions;
import java.time.Duration;
public class BoundedOutOfOrdernessWatermarksOnEventTime<T> implements WatermarkGenerator<T> {
/**
* The maximum timestamp encountered so far.
*/
private long maxTimestamp;
/**
* The maximum out-of-orderness that this watermark generator assumes.
*/
private final long outOfOrdernessMillis;
/**
* Processing time of last event
*/
private long lastEventTimestamp;
/**
* Time to emit watermark if no event comes for a long time.
* The goal is to trigger the computation of the window even when no record is coming.
*/
private final Duration waitTimeInMillsToEmitWatermark;
/**
* watermark generator interval
*/
private final long autoWatermarkInterval;
/**
* The current logical event time.
*/
private long currentLogicalEventTimeMills;
/**
* Creates a new watermark generator with the given out-of-orderness bound.
*
* @param outOfOrderness The bound for the out-of-orderness of the event timestamps.
* @param waitTimeInMillsToEmitWatermark A time allow flink to wait for in case of that no next element arrives for a long time.
* When the waiting time is up and no next element arrives, the watermark will still be generated and emitted
* @param autoWatermarkInterval watermark generator interval
*/
public BoundedOutOfOrdernessWatermarksOnEventTime(Duration outOfOrderness, Duration waitTimeInMillsToEmitWatermark, long autoWatermarkInterval) {
this.waitTimeInMillsToEmitWatermark = waitTimeInMillsToEmitWatermark;
this.autoWatermarkInterval = autoWatermarkInterval;
Preconditions.checkNotNull(outOfOrderness, "outOfOrderness");
Preconditions.checkArgument(!outOfOrderness.isNegative(), "outOfOrderness cannot be negative");
this.outOfOrdernessMillis = outOfOrderness.toMillis();
// start so that our lowest watermark would be Long.MIN_VALUE.
this.maxTimestamp = Long.MIN_VALUE + outOfOrdernessMillis + 1;
this.lastEventTimestamp = Long.MIN_VALUE + outOfOrdernessMillis + 1;
this.currentLogicalEventTimeMills = Long.MIN_VALUE + outOfOrdernessMillis + 1;
}
public BoundedOutOfOrdernessWatermarksOnEventTime(Duration outOfOrderness, Duration waitTimeInMillsToEmitWatermark) {
this(outOfOrderness, waitTimeInMillsToEmitWatermark, 200L);
}
/**
* Flink will call this method when events arrive for each record.
*
* @param event element in stream
* @param eventTimestamp the time an event happened
* @param output An output for watermarks. The output accepts watermarks and idleness (inactivity) status
*/
@Override
public void onEvent(T event, long eventTimestamp, WatermarkOutput output) {
maxTimestamp = Math.max(maxTimestamp, eventTimestamp);
lastEventTimestamp = maxTimestamp;
currentLogicalEventTimeMills = maxTimestamp;
}
/**
* Flink will call this method once in a while, the interval is defined by {@code ExecutionConfig.setAutoWatermarkInterval(...)},
* for example:
* <pre>{@code env.getConfig().setAutoWatermarkInterval(400L);}
* </pre>
* <p>
* If flink wait enough time, and still not get next record, so we need to emit a new watermark triggering a computation of last window.
* If we don't do that, the computation of last window will never be triggered.
* It is important when we want to get some status value from the last event.
*
* @param output An output for watermarks. The output accepts watermarks and idleness (inactivity) status
*/
@Override
public void onPeriodicEmit(WatermarkOutput output) {
currentLogicalEventTimeMills = currentLogicalEventTimeMills + autoWatermarkInterval;
if ((currentLogicalEventTimeMills - lastEventTimestamp) > waitTimeInMillsToEmitWatermark.toMillis()) {
output.emitWatermark(new Watermark(currentLogicalEventTimeMills - outOfOrdernessMillis - 1));
maxTimestamp = currentLogicalEventTimeMills;
} else {
output.emitWatermark(new Watermark(maxTimestamp - outOfOrdernessMillis - 1));
}
}
}
代码核心作用
这是针对Apache Flink 低吞吐量场景下窗口延迟触发问题设计的混合型水印生成器,通过事件驱动+时间驱动双模式确保小数据量场景的窗口及时提交。相较于标准BoundedOutOfOrdernessWatermarks
,该方案解决以下痛点:
1. 数据稀疏场景的窗口停滞问题
-
传统水印缺陷
标准策略(如BoundedOutOfOrdernessWatermarks
)完全依赖事件时间推进水印,当数据间隔超过窗口长度时(如每5分钟仅1条数据),水印无法更新导致窗口永久挂起 -
本方案创新点
引入currentLogicalEventTimeMills
逻辑时钟,以autoWatermarkInterval
(默认200ms)为步长持续推进时间轴。通过waitTimeInMillsToEmitWatermark
阈值(如设定1分钟)判断数据是否中断,自动切换为时间驱动模式
2. 混合触发机制设计
// 关键逻辑判断
if ((currentLogicalEventTimeMills - lastEventTimestamp) > waitTimeThreshold) {
// 时间驱动模式:强制推进水印
output.emitWatermark(new Watermark(logicalTime - outOfOrderness -1));
} else {
// 事件驱动模式:按最大事件时间生成水印
output.emitWatermark(new Watermark(maxTimestamp - outOfOrderness -1));
}
3. 参数协同控制
参数名 | 类型 | 典型值 | 作用 |
---|---|---|---|
outOfOrderness | Duration | 2秒 | 允许事件乱序的最大时差 |
waitTimeInMillsToEmitWatermark | Duration | 1分钟 | 无数据等待超时阈值 |
autoWatermarkInterval | long | 200ms | 逻辑时钟推进频率 |
4. 监控指标设计
# Metrics示例
flink_task_watermark_latency{operator="WindowOperator"} // 水印与事件时间差值
flink_task_idle_time_ms{task="Source"} // 数据源空闲时间