flink中通用的watermark设置代码模板,
watermark生成策略通常有四种,长用的为第一、二种
- 一、WatermarkStrategy.forBoundedOutOfOrderness(Duration.ofMillis(3000)) 允许乱序的watermark // Duration.ofMillis(3000) 设置容忍时间为3000ms
- 二、WatermarkStrategy.forMonotonousTimestamps()
完全不容忍乱序,紧跟最大事件时间,等于第一个括号内填0,调的就是一的API - 三、WatermarkStrategy.noWatermarks() 不使用watermark
- 四、WatermarkStrategy.forGenerator() 自定义watermark
通常会在source直接设置watermark,后续算子会传递依赖source的watermark
source设置watermark代码模板示例:
package com.syxStudy;
import java.time.Duration;
public class WaterMarkAPI {
public static void main(String[] args) {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// 数据源获取socket文本流,假设数据格式为 1684243149,syx,18
DataStreamSource<String> sourceStream = env.socketTextStream("192.168.xx.xxx", 9999);
//构造一个watermark生成策略
WatermarkStrategy<String> stringWatermarkStrategy = WatermarkStrategy.<String>forBoundedOutOfOrderness(Duration.ofMillis(3000)).withTimestampAssigner(new SerializableTimestampAssigner<String>() {
@Override
public long extractTimestamp(String s, long l) {
return Long.parseLong(s.split(",")[0]);
}
});
//将构造的watermark算子分配给source算子。不止可以在source分配watermark,也可以从后续算子开始生成watermark。一般来说是从source生成
sourceStream.assignTimestampsAndWatermarks(stringWatermarkStrategy);
}
}
在map算子中生成watermark(不常用)
package com.syxStudy;
import java.time.Duration;
public class WaterMarkAPI {
public static void main(String[] args) {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// 数据源获取socket文本流,假设数据格式为 1684243149,syx,18
DataStreamSource<String> sourceStream = env.socketTextStream("192.168.xx.xxx", 9999);
//在map算子计算逻辑
SingleOutputStreamOperator<EventBean> map_func = sourceStream.map(new MapFunction<String, EventBean>() {
@Override
public EventBean map(String s) throws Exception {
String[] split_socket_txt = s.split(",");
return new EventBean(Long.parseLong(split_socket_txt[0]), split_socket_txt[1], Integer.parseInt(split_socket_txt[0]));
}
}).returns(EventBean.class);
//添加到map算子中
SingleOutputStreamOperator<EventBean> map_add_watermark = map_func.assignTimestampsAndWatermarks(
WatermarkStrategy.<EventBean>forMonotonousTimestamps()
.withTimestampAssigner(new SerializableTimestampAssigner<EventBean>() {
@Override
public long extractTimestamp(EventBean eventBean, long l) {
return eventBean.getTimestamp();
}
})
);
}
}
@Data
@AllArgsConstructor
@NoArgsConstructor
class EventBean{
private Long timestamp;
private String name;
private int age;
}