Flink API - 多流操作


Flink 的多流合并的机制是以 FIFO 的方式合并多条流。
union
多条流的元素类型必须一样
可以合并多条流:stream1.union(stream2, stream3)
connect
只能合并两条流
两条流的元素的类型可以不一样
intervalJoin
DataStream API
CoMapFunction<IN1, IN2, OUT>
map1
map2
CoFlatMapFunction<IN1, IN2, OUT>
flatMap1:来自第一条流的事件进入CoFlatMapFunction,触发调用。
flatMap2:来自第二条流的事件进入CoFlatMapFunction,触发调用。

底层API
	CoProcessFunction<IN1, IN2, OUT>
	processElement1
	processElement2
	BroadcastProcessFunction<IN1, IN2, OUT>
	processElement
	processBroadcastElement 处理广播流数据
	KeyedBroadcastProcessFunction
	ProcessJoinFunction<IN1, IN2, OUT>
	JoinFunction

union

public class UnionDemo {
    public static void main(String[] args) throws Exception {

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);


        DataStreamSource<Integer> streamOne = env.fromElements(1, 2);

        DataStreamSource<Integer> streamTwo = env.fromElements(3, 4);

        DataStreamSource<Integer> streamThree = env.fromElements(5, 6);


        // union
        // 1. 多条流的合并
        // 2. 所有流中的事件类型必须是一样的
        // 先来先处理

        DataStream<Integer> unionStream = streamOne.union(streamTwo, streamThree);

        unionStream.print();


        env.execute();
    }
}

connect

CoFlatMapFunction & CoMapFunction & connect

public class CoFlatMapFunctionDemo {

    public static void main(String[] args) throws Exception {

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        DataStreamSource<Event> clickStream = env.addSource(new ClickSource());

        /*
        nc -lk 9092
            Mary
            Alice

         */
        DataStreamSource<String> ruleStream = env.socketTextStream("hadoop102", 9092);

        // connect
        // 只能合并两条流
        // 两条流的元素的类型可以不一样
        ConnectedStreams<Event, String> connectedStreams = clickStream
                .connect(ruleStream);

        connectedStreams
                .flatMap(
                        //CoMapFunction
                        new CoFlatMapFunction<Event, String, Event>() {

                            private String rule;

                            @Override
                            public void flatMap1(Event value, Collector<Event> out) throws Exception {

                                // 按规则匹配 点击流

                                if (StringUtils.equals(rule,value.user)) out.collect(value);
                            }

                            @Override
                            public void flatMap2(String value, Collector<Event> out) throws Exception {
                                // 处理 connect 中的 规则流
                                rule = value;
                            }
                        }
                )
                .print();


        env.execute();

/*
1> Event{user='Mary', url='./cart', timestamp=2023-03-01 21:46:14.381}
1> Event{user='Mary', url='./prod?id=2', timestamp=2023-03-01 21:46:24.27}
1> Event{user='Mary', url='./prod?id=2', timestamp=2023-03-01 21:46:29.159}
1> Event{user='Mary', url='./prod?id=1', timestamp=2023-03-01 21:46:47.629}
1> Event{user='Mary', url='./prod?id=2', timestamp=2023-03-01 21:46:54.159}
1> Event{user='Mary', url='./cart', timestamp=2023-03-01 21:47:00.694}
1> Event{user='Mary', url='./prod?id=2', timestamp=2023-03-01 21:47:03.958}
2> Event{user='Alice', url='./cart', timestamp=2023-03-01 21:47:08.952}
1> Event{user='Mary', url='./prod?id=2', timestamp=2023-03-01 21:47:12.115}
2> Event{user='Alice', url='./fav', timestamp=2023-03-01 21:47:13.849}
2> Event{user='Alice', url='./home', timestamp=2023-03-01 21:47:17.104}
1> Event{user='Mary', url='./fav', timestamp=2023-03-01 21:47:18.63}
1> Event{user='Mary', url='./prod?id=1', timestamp=2023-03-01 21:47:23.528}
2> Event{user='Alice', url='./fav', timestamp=2023-03-01 21:47:28.53}
1> Event{user='Mary', url='./home', timestamp=2023-03-01 21:47:31.692}
 */
    }

CoProcessFunction & connect

public class CoProcessFunctionDemo {

    public static void main(String[] args) throws Exception {

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        SingleOutputStreamOperator<Event> clickStream = env
                .addSource(new ClickSource());
//                .filter(r -> "./home".equals(r.url) || "./fav".equals(r.url));

        // ./home 放行 5 s
        // ./fav 放行 10 s
        DataStreamSource<Tuple2<String, Long>> switchStream = env
                .fromElements(
                        Tuple2.of("./home", 5 * 1000L),
                        Tuple2.of("./fav", 15 * 1000L)
                );

        clickStream
                .connect(switchStream)
                .keyBy(cR -> cR.url,rR -> rR.f0)
                .process(
                        new CoProcessFunction<Event, Tuple2<String, Long>, Event>() {

                            private ValueState<Boolean> enableThroughTs ;

                            @Override
                            public void open(Configuration parameters) throws Exception {
                                enableThroughTs = getRuntimeContext().getState(new ValueStateDescriptor<Boolean>("enableThroughTs", Types.BOOLEAN));
                            }

                            @Override
                            public void processElement1(Event value, Context ctx, Collector<Event> out) throws Exception {

                                if (enableThroughTs.value() != null && enableThroughTs.value()){
                                    out.collect(value);
                                }

                            }

                            @Override
                            public void processElement2(Tuple2<String, Long> value, Context ctx, Collector<Event> out) throws Exception {

                                // 处理开关流

                                // 打开开关
                                enableThroughTs.update(true);

                                // 注册定时器关闭开关
                                ctx.timerService().registerProcessingTimeTimer(ctx.timerService().currentProcessingTime() + value.f1);

                            }

                            @Override
                            public void onTimer(long timestamp, OnTimerContext ctx, Collector<Event> out) throws Exception {

                                // 关闭开关
                                enableThroughTs.clear();

                            }
                        }
                )
                .print();



        env.execute();


    }

}

ProcessJoinFunction & intervalJoin

public class ProcessJoinFunctionDemo {
    public static void main(String[] args) throws Exception {

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        env.setParallelism(1);

        SingleOutputStreamOperator<Tuple3<String, String, Long>> clickStream = env
                .fromElements(
                        Tuple3.of("user-1", "click", 12 * 60 * 1000L)
                )
                .assignTimestampsAndWatermarks(
                        WatermarkStrategy.<Tuple3<String, String, Long>>forBoundedOutOfOrderness(Duration.ofSeconds(0L)).withTimestampAssigner((Tuple3<String, String, Long> element, long recordTimestamp) -> element.f2)
                );


        SingleOutputStreamOperator<Tuple3<String, String, Long>> browseStream = env
                .fromElements(
                        Tuple3.of("user-1", "browse", 1 * 60 * 1000L),
                        Tuple3.of("user-1", "browse", 7 * 60 * 1000L),
                        Tuple3.of("user-1", "browse", 10 * 60 * 1000L),
                        Tuple3.of("user-1", "browse", 11 * 60 * 1000L),
                        Tuple3.of("user-1", "browse", 20 * 60 * 1000L)
                )
                .assignTimestampsAndWatermarks(
                        WatermarkStrategy.<Tuple3<String, String, Long>>forBoundedOutOfOrderness(Duration.ofSeconds(0L)).withTimestampAssigner((Tuple3<String, String, Long> element, long recordTimestamp) -> element.f2)
                );



        clickStream
                .keyBy(r -> r.f0)
                .intervalJoin(browseStream.keyBy(r -> r.f0))

                // browse + 10  < clickTs  < browse + 15
                //          2   <   12     < 17
                .between(Time.minutes(-10),Time.minutes(5))
                .process(
                        new ProcessJoinFunction<Tuple3<String, String, Long>, Tuple3<String, String, Long>, String>() {
                            @Override
                            public void processElement(Tuple3<String, String, Long> left, Tuple3<String, String, Long> right, Context ctx, Collector<String> out) throws Exception {
                                out.collect(left + " -> " + right);
                            }
                        }
                )
                .print();


        env.execute();


    }
}

BroadcastProcessFunction & connect

public class BroadcastProcessFunctionDemo {

    public static void main(String[] args) throws Exception {

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);

        // 点击流
        DataStreamSource<Event> clickStream = env.addSource(new ClickSource());

        // 广播流
        DataStreamSource<Tuple2<String, Integer>> infoStream = env
                .fromElements(
                        Tuple2.of("Mary", 100),
                        Tuple2.of("Bob", 100),
                        Tuple2.of("Alice", 100)
                );

        // 配置广播流
        MapStateDescriptor<String, Tuple2<String, Integer>> broadCastDescriptor = new MapStateDescriptor<String, Tuple2<String, Integer>>("broad-test", Types.STRING, Types.TUPLE(Types.STRING, Types.INT));
        BroadcastStream<Tuple2<String, Integer>> broadcastStream = infoStream.broadcast(broadCastDescriptor);


        clickStream
                .connect(broadcastStream)
                .process(
                        new BroadcastProcessFunction<Event, Tuple2<String, Integer>, String>() {
                            @Override
                            public void processElement(Event event, ReadOnlyContext ctx, Collector<String> out) throws Exception {

                                ReadOnlyBroadcastState<String, Tuple2<String, Integer>> broadcastState = ctx.getBroadcastState(new MapStateDescriptor<String, Tuple2<String, Integer>>("broad-test", Types.STRING, Types.TUPLE(Types.STRING, Types.INT)));//(broadCastDescriptor);

                                out.collect(event + " -> " + broadcastState.get(event.user));

                            }

                            // 处理广播流数据
                            @Override
                            public void processBroadcastElement(Tuple2<String, Integer> value, Context ctx, Collector<String> out) throws Exception {

                                // 设置广播流
                                BroadcastState<String, Tuple2<String, Integer>> broadcastState = ctx.getBroadcastState(new MapStateDescriptor<String, Tuple2<String, Integer>>("broad-test", Types.STRING, Types.TUPLE(Types.STRING, Types.INT)));//(broadCastDescriptor);

                                broadcastState.put(value.f0, value);

                            }
                        }
                )
                .print();


        env.execute();


    }
}

KeyedBroadcastProcessFunction & connect

public class KeyedBroadcastProcessFunctionDemo {


    public static void main(String[] args) throws Exception {

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);

        // 点击流
        DataStreamSource<Event> clickStream = env.addSource(new ClickSource());

        // 广播流
        DataStreamSource<Tuple2<String, Integer>> infoStream = env
                .fromElements(
                        Tuple2.of("Mary", 100),
                        Tuple2.of("Bob", 100),
                        Tuple2.of("Alice", 100)
                );

        // 配置广播流
        MapStateDescriptor<String, Tuple2<String, Integer>> broadCastDescriptor = new MapStateDescriptor<String, Tuple2<String, Integer>>("broad-test", Types.STRING, Types.TUPLE(Types.STRING, Types.INT));
        BroadcastStream<Tuple2<String, Integer>> broadcastStream = infoStream.broadcast(broadCastDescriptor);


        clickStream
                .keyBy(elem -> true)
                .connect(broadcastStream)
                .process(new KeyedBroadcastProcessFunction<Object, Event, Tuple2<String, Integer>, String>() {
                    @Override
                    public void processElement(Event event, ReadOnlyContext ctx, Collector<String> out) throws Exception {

                        ReadOnlyBroadcastState<String, Tuple2<String, Integer>> broadcastState = ctx.getBroadcastState(broadCastDescriptor);

                        out.collect(event + " -> " + broadcastState.get(event.user));

                    }

                    @Override
                    public void processBroadcastElement(Tuple2<String, Integer> value, Context ctx, Collector<String> out) throws Exception {
                        // 设置广播流
                        BroadcastState<String, Tuple2<String, Integer>> broadcastState = ctx.getBroadcastState(broadCastDescriptor);

                        broadcastState.put(value.f0, value);

                    }
                })
                .print();


        env.execute();


    }


}

JoinFunction

public class JoinWindowDemo {

    public static void main(String[] args) throws Exception {

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        env.setParallelism(1);

        SingleOutputStreamOperator<Tuple2<String, Long>> streamOne = env
                .fromElements(
                        Tuple2.of("a", 1L),
                        Tuple2.of("b", 1L)
                )
                .assignTimestampsAndWatermarks(
                        WatermarkStrategy.<Tuple2<String, Long>>forMonotonousTimestamps()
                                .withTimestampAssigner((Tuple2<String, Long> elem, long ts) -> elem.f1 * 1000L)
                );


        SingleOutputStreamOperator<Tuple2<String, Long>> streamTwo = env
                .fromElements(
                        Tuple2.of("a", 1L),
                        Tuple2.of("a", 2L),
                        Tuple2.of("a", 5L),
                        Tuple2.of("a", 7L),
                        Tuple2.of("b", 4L),
                        Tuple2.of("b", 6L)
                )
                .assignTimestampsAndWatermarks(
                        WatermarkStrategy.<Tuple2<String, Long>>forMonotonousTimestamps()
                                .withTimestampAssigner((Tuple2<String, Long> elem, long ts) -> elem.f1 * 1000L)
                );


        streamOne
                .join(streamTwo)
                .where(r -> r.f0)
                .equalTo(r -> r.f0)
                .window(TumblingEventTimeWindows.of(Time.seconds(5L)))
                .apply(new JoinFunction<Tuple2<String, Long>, Tuple2<String, Long>, String>() {
                    @Override
                    public String join(Tuple2<String, Long> first, Tuple2<String, Long> second) throws Exception {
                        return first + " -> " + second;
                    }
                })
                .print();


        env.execute();


    }

}

参考资料
https://nightlies.apache.org/flink/flink-docs-release-1.16/docs/dev/datastream/operators/joining/

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值