先上代码
- 添加Watermaker来解决一定程度上的数据延迟和数据乱序问题。
- 使用OutputTag+allowedLateness解决数据丢失问题
package com.daidai.watermarks;
import com.daidai.source.mocksource.domain.Order;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.source.SourceFunction;
import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.util.OutputTag;
import java.time.Duration;
import java.util.Random;
import java.util.UUID;
public class AllowedLateness {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStreamSource<Order> orderDataStreamSource = env.addSource(new SourceFunction<Order>() {
private boolean flag = true;
@Override
public void run(SourceContext<Order> ctx) throws Exception {
while (flag) {
Random random = new Random();
Order order = new Order();
order.setId(UUID.randomUUID().toString());
order.setUserId(random.nextInt(3));
order.setCreateTime(System.currentTimeMillis() - random.nextInt(15) * 1000);
order.setMoney(random.nextInt(100));
ctx.collect(order);
}
}
@Override
public void cancel() {
flag = false;
}
});
OutputTag<Order> later = new OutputTag<>("later", TypeInformation.of(Order.class));
SingleOutputStreamOperator<Order> timestampsAndWatermarks = orderDataStreamSource
.assignTimestampsAndWatermarks(
WatermarkStrategy
.<Order>forBoundedOutOfOrderness(Duration.ofSeconds(3))
.withTimestampAssigner((context, timestmp) -> context.getCreateTime()));
SingleOutputStreamOperator<Order> sum = timestampsAndWatermarks.keyBy(Order::getUserId)
.window(TumblingEventTimeWindows.of(Time.seconds(5)))
.allowedLateness(Time.seconds(5))
.sideOutputLateData(later)
.sum("money");
sum.print("正常数据");
DataStream<Order> laterDS = sum.getSideOutput(later);
laterDS.print("迟到严重数据");
env.execute();
}
}