56、Flink 的 CoProcessFunction 代码示例

1、代码示例

import org.apache.flink.api.common.eventtime.SerializableTimestampAssigner;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.state.ListState;
import org.apache.flink.api.common.state.ListStateDescriptor;
import org.apache.flink.api.common.state.StateTtlConfig;
import org.apache.flink.api.common.typeinfo.TypeHint;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.api.java.tuple.Tuple5;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.co.KeyedCoProcessFunction;
import org.apache.flink.util.Collector;

import java.time.Duration;

public class _02_CoProcessFunction {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        // 测试时限制了分区数,生产中需要设置空闲数据源
        env.setParallelism(2);
        env.disableOperatorChaining();

        DataStreamSource<String> inputLeft = env.socketTextStream("localhost", 8888);

        // 事件时间需要设置水位线策略和时间戳
        SingleOutputStreamOperator<Tuple3<String, String, Long>> mapLeft = inputLeft.map(new MapFunction<String, Tuple3<String, String, Long>>() {
            @Override
            public Tuple3<String, String, Long> map(String input) throws Exception {
                String[] fields = input.split(",");
                return new Tuple3<>(fields[0], fields[1], Long.parseLong(fields[2]));
            }
        });

        SingleOutputStreamOperator<Tuple3<String, String, Long>> watermarkLeft = mapLeft.assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple3<String, String, Long>>forBoundedOutOfOrderness(Duration.ofSeconds(0))
                .withTimestampAssigner(new SerializableTimestampAssigner<Tuple3<String, String, Long>>() {
                    @Override
                    public long extractTimestamp(Tuple3<String, String, Long> input, long l) {
                        return input.f2;
                    }
                }));

        DataStreamSource<String> inputRight = env.socketTextStream("localhost", 9999);

        // 事件时间需要设置水位线策略和时间戳
        SingleOutputStreamOperator<Tuple3<String, String, Long>> mapRight = inputRight.map(new MapFunction<String, Tuple3<String, String, Long>>() {
            @Override
            public Tuple3<String, String, Long> map(String input) throws Exception {
                String[] fields = input.split(",");
                return new Tuple3<>(fields[0], fields[1], Long.parseLong(fields[2]));
            }
        });

        SingleOutputStreamOperator<Tuple3<String, String, Long>> watermarkRight = mapRight.assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple3<String, String, Long>>forBoundedOutOfOrderness(Duration.ofSeconds(0))
                .withTimestampAssigner(new SerializableTimestampAssigner<Tuple3<String, String, Long>>() {
                    @Override
                    public long extractTimestamp(Tuple3<String, String, Long> input, long l) {
                        return input.f2;
                    }
                }));

        /**
         * left-1
         *
         * a,1,1718089200000
         * b,2,1718089200000
         * c,3,1718089200000
         *
         * left 流数据创建的定时器时间为=>1718089205000
         * left 流数据创建的定时器时间为=>1718089205000
         * left 流数据创建的定时器时间为=>1718089205000
         *
         * Co-Keyed-Process-Watermark:No Watermark
         *
         * right-2
         *
         * a,1,1718089201000
         * b,2,1718089201000
         * c,3,1718089201000
         *
         * right 流数据创建的定时器时间为=>1718089206000
         * right 流数据创建的定时器时间为=>1718089206000
         * right 流数据创建的定时器时间为=>1718089206000
         *
         * Co-Keyed-Process-Watermark:1718089199999
         *
         * left-3
         *
         * a,4,1718089202000
         * b,5,1718089202000
         * c,6,1718089202000
         *
         * left 流数据创建的定时器时间为=>1718089207000
         * left 流数据创建的定时器时间为=>1718089207000
         * left 流数据创建的定时器时间为=>1718089207000
         *
         * Co-Keyed-Process-Watermark:1718089200999
         *
         * right-4
         *
         * a,7,1718089203000
         * b,8,1718089203000
         * c,9,1718089203000
         *
         * right 流数据创建的定时器时间为=>1718089208000
         * right 流数据创建的定时器时间为=>1718089208000
         * right 流数据创建的定时器时间为=>1718089208000
         *
         * Co-Keyed-Process-Watermark:1718089201999
         *
         * left-right-5
         *
         * a,1,1718089205001[定时器还未触发-在状态中参与计算]
         * b,2,1718089205001[定时器触发-在状态中参与计算]
         * c,3,1718089205001[right-stream 的 c-3 到达时,定时器已经触发了,所以没有参与计算]
         *
         * left 流数据创建的定时器时间为=>1718089210000
         * left 流数据创建的定时器时间为=>1718089210000
         * left 流数据创建的定时器时间为=>1718089210000
         *
         * right 流数据创建的定时器时间为=>1718089210000
         * right 流数据创建的定时器时间为=>1718089210000
         * right 流数据创建的定时器时间为=>1718089210000
         *
         * 定时器触发时,当前的 Key=>a,当前的 Watermark=>1718089205000,当前的 timestamp=>1718089205000
         * 定时器触发时,当前的 Key=>b,当前的 Watermark=>1718089205000,当前的 timestamp=>1718089205000
         * 定时器触发时,当前的 Key=>c,当前的 Watermark=>1718089205000,当前的 timestamp=>1718089205000
         *
         * 2> (a,1,1,1718089200000,1718089201000)
         * 2> (a,1,1,1718089205001,1718089201000)
         * 2> (a,1,1,1718089200000,1718089205001)
         * 2> (a,1,1,1718089205001,1718089205001)
         * 2> (a,4,,1718089202000,0)
         *
         * 1> (b,2,2,1718089200000,1718089201000)
         * 1> (b,2,2,1718089205001,1718089201000)
         * 1> (b,2,2,1718089200000,1718089205001)
         * 1> (b,2,2,1718089205001,1718089205001)
         * 1> (b,5,,1718089202000,0)
         *
         * 1> (c,3,3,1718089200000,1718089201000)
         * 1> (c,3,3,1718089205001,1718089201000)
         * 1> (c,6,,1718089202000,0)
         *
         * Co-Keyed-Process-Watermark:1718089205000
         */
        watermarkLeft.keyBy(e -> e.f0)
                .connect(watermarkRight.keyBy(e -> e.f0))
                .process(new KeyedCoProcessFunction<String, Tuple3<String, String, Long>, Tuple3<String, String, Long>, Tuple5<String, String, String, Long, Long>>() {
                    private ListState<Tuple3<String, String, Long>> leftListState;
                    private ListState<Tuple3<String, String, Long>> rightListState;

                    @Override
                    public void open(Configuration parameters) throws Exception {
                        // 设置状态的 TTL,防止状态持续扩大,只支持处理时间
//                        StateTtlConfig stateTtlConfig = StateTtlConfig.newBuilder(Duration.ofSeconds(8)).build();

                        ListStateDescriptor<Tuple3<String, String, Long>> leftListDesc = new ListStateDescriptor<>("left", TypeInformation.of(new TypeHint<Tuple3<String, String, Long>>() {
                        }));

//                        leftListDesc.enableTimeToLive(stateTtlConfig);

                        leftListState = getRuntimeContext().getListState(leftListDesc);

                        ListStateDescriptor<Tuple3<String, String, Long>> rightListDesc = new ListStateDescriptor<>("right", TypeInformation.of(new TypeHint<Tuple3<String, String, Long>>() {
                        }));

//                        rightListDesc.enableTimeToLive(stateTtlConfig);

                        rightListState = getRuntimeContext().getListState(rightListDesc);
                    }

                    @Override
                    public void close() throws Exception {
                        leftListState.clear();
                        rightListState.clear();
                    }

                    @Override
                    public void processElement1(Tuple3<String, String, Long> left, KeyedCoProcessFunction<String, Tuple3<String, String, Long>, Tuple3<String, String, Long>, Tuple5<String, String, String, Long, Long>>.Context context, Collector<Tuple5<String, String, String, Long, Long>> collector) throws Exception {
                        leftListState.add(left);
                        long currentTimeStamp = Long.parseLong((context.timestamp() + 5000) / 1000 + "000");
                        System.out.println("left 流数据创建的定时器时间为=>" + currentTimeStamp);
                        context.timerService().registerEventTimeTimer(currentTimeStamp);
                    }

                    @Override
                    public void processElement2(Tuple3<String, String, Long> right, KeyedCoProcessFunction<String, Tuple3<String, String, Long>, Tuple3<String, String, Long>, Tuple5<String, String, String, Long, Long>>.Context context, Collector<Tuple5<String, String, String, Long, Long>> collector) throws Exception {
                        rightListState.add(right);
                        long currentTimeStamp = Long.parseLong((context.timestamp() + 5000) / 1000 + "000");
                        System.out.println("right 流数据创建的定时器时间为=>" + currentTimeStamp);
                        context.timerService().registerEventTimeTimer(currentTimeStamp);
                    }

                    @Override
                    public void onTimer(long timestamp, KeyedCoProcessFunction<String, Tuple3<String, String, Long>, Tuple3<String, String, Long>, Tuple5<String, String, String, Long, Long>>.OnTimerContext ctx, Collector<Tuple5<String, String, String, Long, Long>> out) throws Exception {
                        String currentKey = ctx.getCurrentKey();
                        long currentWatermark = ctx.timerService().currentWatermark();
                        System.out.println("定时器触发时,当前的 Key=>" + currentKey + ",当前的 Watermark=>" + currentWatermark + ",当前的 timestamp=>" + timestamp);

                        for (Tuple3<String, String, Long> leftTuple : leftListState.get()) {
                            boolean isJoin = false;

                            for (Tuple3<String, String, Long> rightTuple : rightListState.get()) {
                                if (leftTuple.f1.equals(rightTuple.f1)) {
                                    isJoin = true;
                                    out.collect(new Tuple5<>(leftTuple.f0, leftTuple.f1, rightTuple.f1, leftTuple.f2, rightTuple.f2));
                                }
                            }

                            // 模拟 left join
                            if (!isJoin) {
                                out.collect(new Tuple5<>(leftTuple.f0, leftTuple.f1, "", leftTuple.f2, 0L));
                            }
                        }
                    }
                })
                .print();

        env.execute();
    }
}

2、测试用例

          left-1
         
          a,1,1718089200000
          b,2,1718089200000
          c,3,1718089200000
         
          left 流数据创建的定时器时间为=>1718089205000
          left 流数据创建的定时器时间为=>1718089205000
          left 流数据创建的定时器时间为=>1718089205000
         
          Co-Keyed-Process-Watermark:No Watermark
         
          right-2
         
          a,1,1718089201000
          b,2,1718089201000
          c,3,1718089201000
         
          right 流数据创建的定时器时间为=>1718089206000
          right 流数据创建的定时器时间为=>1718089206000
          right 流数据创建的定时器时间为=>1718089206000
         
          Co-Keyed-Process-Watermark:1718089199999
         
          left-3
         
          a,4,1718089202000
          b,5,1718089202000
          c,6,1718089202000
         
          left 流数据创建的定时器时间为=>1718089207000
          left 流数据创建的定时器时间为=>1718089207000
          left 流数据创建的定时器时间为=>1718089207000
         
          Co-Keyed-Process-Watermark:1718089200999
         
          right-4
         
          a,7,1718089203000
          b,8,1718089203000
          c,9,1718089203000
         
          right 流数据创建的定时器时间为=>1718089208000
          right 流数据创建的定时器时间为=>1718089208000
          right 流数据创建的定时器时间为=>1718089208000
         
          Co-Keyed-Process-Watermark:1718089201999
         
          left-right-5
         
          a,1,1718089205001[定时器还未触发-在状态中参与计算]
          b,2,1718089205001[定时器触发-在状态中参与计算]
          c,3,1718089205001[right-stream 的 c-3 到达时,定时器已经触发了,所以没有参与计算]
         
          left 流数据创建的定时器时间为=>1718089210000
          left 流数据创建的定时器时间为=>1718089210000
          left 流数据创建的定时器时间为=>1718089210000
         
          right 流数据创建的定时器时间为=>1718089210000
          right 流数据创建的定时器时间为=>1718089210000
          right 流数据创建的定时器时间为=>1718089210000
         
          定时器触发时,当前的 Key=>a,当前的 Watermark=>1718089205000,当前的 timestamp=>1718089205000
          定时器触发时,当前的 Key=>b,当前的 Watermark=>1718089205000,当前的 timestamp=>1718089205000
          定时器触发时,当前的 Key=>c,当前的 Watermark=>1718089205000,当前的 timestamp=>1718089205000
         
          2> (a,1,1,1718089200000,1718089201000)
          2> (a,1,1,1718089205001,1718089201000)
          2> (a,1,1,1718089200000,1718089205001)
          2> (a,1,1,1718089205001,1718089205001)
          2> (a,4,,1718089202000,0)
         
          1> (b,2,2,1718089200000,1718089201000)
          1> (b,2,2,1718089205001,1718089201000)
          1> (b,2,2,1718089200000,1718089205001)
          1> (b,2,2,1718089205001,1718089205001)
          1> (b,5,,1718089202000,0)
         
          1> (c,3,3,1718089200000,1718089201000)
          1> (c,3,3,1718089205001,1718089201000)
          1> (c,6,,1718089202000,0)
         
          Co-Keyed-Process-Watermark:1718089205000
  • 3
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

猫猫爱吃小鱼粮

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值