53、Flink Interval Join 代码示例

1、概述

interval Join 默认会根据 keyBy 的条件进行 Join 此时为 Inner Join;

interval Join 算子的水位线会取两条流中水位线的最小值;

interval Join 迟到数据的判定是以 interval Join 算子的水位线为基准;

interval Join 可以分别输出两条流中迟到的数据-[sideOutputLeftLateData,sideOutputRightLateData];

2、代码示例

import org.apache.flink.api.common.eventtime.SerializableTimestampAssigner;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.co.ProcessJoinFunction;
import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;

import java.time.Duration;

/**
 * interval Join 默认会根据 keyBy 的条件进行 Join 此时为 Inner Join
 * interval Join 算子的水位线会取两条流中水位线的最小值;
 * interval Join 迟到数据的判定是以 interval Join 算子的水位线为基准;
 * interval Join 可以分别输出两条流中迟到的数据-[sideOutputLeftLateData,sideOutputRightLateData];
 */
public class _04_IntervalInnerJoin {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        // 测试时限制了分区数,生产中需要设置空闲数据源
        env.setParallelism(2);
        env.disableOperatorChaining();

        DataStreamSource<String> inputLeft = env.socketTextStream("localhost", 8888);

        // 事件时间需要设置水位线策略和时间戳
        SingleOutputStreamOperator<Tuple2<String, Long>> mapLeft = inputLeft.map(new MapFunction<String, Tuple2<String, Long>>() {
            @Override
            public Tuple2<String, Long> map(String input) throws Exception {
                String[] fields = input.split(",");
                return new Tuple2<>(fields[0], Long.parseLong(fields[1]));
            }
        });

        SingleOutputStreamOperator<Tuple2<String, Long>> watermarkLeft = mapLeft.assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple2<String, Long>>forBoundedOutOfOrderness(Duration.ofSeconds(0))
                .withTimestampAssigner(new SerializableTimestampAssigner<Tuple2<String, Long>>() {
                    @Override
                    public long extractTimestamp(Tuple2<String, Long> input, long l) {
                        return input.f1;
                    }
                }));

        DataStreamSource<String> inputRight = env.socketTextStream("localhost", 9999);

        OutputTag<Tuple2<String, Long>> leftLateTag = new OutputTag<Tuple2<String, Long>>("left-late") {};
        OutputTag<Tuple2<String, Long>> rightLateTag = new OutputTag<Tuple2<String, Long>>("right-late") {};

        // 事件时间需要设置水位线策略和时间戳
        SingleOutputStreamOperator<Tuple2<String, Long>> mapRight = inputRight.map(new MapFunction<String, Tuple2<String, Long>>() {
            @Override
            public Tuple2<String, Long> map(String input) throws Exception {
                String[] fields = input.split(",");
                return new Tuple2<>(fields[0], Long.parseLong(fields[1]));
            }
        });

        SingleOutputStreamOperator<Tuple2<String, Long>> watermarkRight = mapRight.assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple2<String, Long>>forBoundedOutOfOrderness(Duration.ofSeconds(0))
                .withTimestampAssigner(new SerializableTimestampAssigner<Tuple2<String, Long>>() {
                    @Override
                    public long extractTimestamp(Tuple2<String, Long> input, long l) {
                        return input.f1;
                    }
                }));

        /**
         * left-1
         *
         * a,1718089200000
         * b,1718089200000
         * c,1718089200000
         *
         * interval_join_watermark=No Watermark
         *
         * right-2
         *
         * a,1718089201000
         * b,1718089201000
         * c,1718089201000
         *
         * interval_join_watermark=1718089199999
         *
         * res=:2> (a,1718089200000,1718089201000)
         * res=:1> (b,1718089200000,1718089201000)
         * res=:1> (c,1718089200000,1718089201000)
         *
         * left-3
         *
         * a,1718089203000
         * b,1718089203000
         * c,1718089203000
         *
         * interval_join_watermark=1718089200999
         *
         * right-4
         *
         * a,1718089204000
         * b,1718089204000
         * c,1718089204000
         *
         * interval_join_watermark=1718089202999
         *
         * res=:2> (a,1718089203000,1718089204000)
         * res=:1> (b,1718089203000,1718089204000)
         * res=:1> (c,1718089203000,1718089204000)
         *
         * left-right-5
         *
         * a,1718089202000
         * b,1718089202000
         * c,1718089202000
         *
         * left-late=:1> (b,1718089202000)
         * left-late=:2> (a,1718089202000)
         * left-late=:1> (c,1718089202000)
         * right-late=:1> (b,1718089202000)
         * right-late=:2> (a,1718089202000)
         * right-late=:1> (c,1718089202000)
         */
        SingleOutputStreamOperator<Tuple3<String, Long, Long>> resStream = watermarkLeft.keyBy(e -> e.f0)
                .intervalJoin(watermarkRight.keyBy(e -> e.f0))
                .between(Duration.ofSeconds(-1), Duration.ofSeconds(1))
                .sideOutputLeftLateData(leftLateTag)
                .sideOutputRightLateData(rightLateTag)
                .process(new ProcessJoinFunction<Tuple2<String, Long>, Tuple2<String, Long>, Tuple3<String, Long, Long>>() {
                    @Override
                    public void processElement(Tuple2<String, Long> t1, Tuple2<String, Long> t2, ProcessJoinFunction<Tuple2<String, Long>, Tuple2<String, Long>, Tuple3<String, Long, Long>>.Context context, Collector<Tuple3<String, Long, Long>> collector) throws Exception {
                        collector.collect(new Tuple3<>(t1.f0, t1.f1, t2.f1));
                    }
                });

        resStream.print("res=");
        resStream.getSideOutput(leftLateTag).print("left-late=");
        resStream.getSideOutput(rightLateTag).print("right-late=");

        env.execute();
    }
}

3、测试用例

		  left-1
         
          a,1718089200000
          b,1718089200000
          c,1718089200000
         
          interval_join_watermark=No Watermark
         
          right-2
         
          a,1718089201000
          b,1718089201000
          c,1718089201000
         
          interval_join_watermark=1718089199999
         
          res=:2> (a,1718089200000,1718089201000)
          res=:1> (b,1718089200000,1718089201000)
          res=:1> (c,1718089200000,1718089201000)
         
          left-3
         
          a,1718089203000
          b,1718089203000
          c,1718089203000
         
          interval_join_watermark=1718089200999
         
          right-4
         
          a,1718089204000
          b,1718089204000
          c,1718089204000
         
          interval_join_watermark=1718089202999
         
          res=:2> (a,1718089203000,1718089204000)
          res=:1> (b,1718089203000,1718089204000)
          res=:1> (c,1718089203000,1718089204000)
         
          left-right-5
         
          a,1718089202000
          b,1718089202000
          c,1718089202000
         
          left-late=:1> (b,1718089202000)
          left-late=:2> (a,1718089202000)
          left-late=:1> (c,1718089202000)
          right-late=:1> (b,1718089202000)
          right-late=:2> (a,1718089202000)
          right-late=:1> (c,1718089202000)
  • 5
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

猫猫爱吃小鱼粮

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值