Flink之多流操作


一、分流

1.1、filter过滤:不推荐

package com.hpsk.flink.moreStream;

import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

public class CutDataSteam1 {
    public static void main(String[] args) throws Exception {
        // 1.创建执行环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);
        // 2.从自定义Source读取数据
        SingleOutputStreamOperator<Tuple3<String, String, Long>> inputDS = env.fromElements(
                Tuple3.of("001", "app", 1000L),
                Tuple3.of("002", "web", 2000L),
                Tuple3.of("003", "html5", 2000L)
        );
        // 3.分流
        SingleOutputStreamOperator<Tuple3<String, String, Long>> app = inputDS
                .filter(line -> line.f1.equals("app"));
        SingleOutputStreamOperator<Tuple3<String, String, Long>> web = inputDS
                .filter(line -> line.f1.equals("web"));
        // 4.打印输出
        app.print("app ");
        web.print("web ");
        // 5.执行
        env.execute();
    }
}

1.2、OutputTag侧输出流:推荐

package com.hpsk.flink.moreStream;

import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.ProcessFunction;
import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;

public class OutputTagDataSteam {
    public static void main(String[] args) throws Exception {
        // 1.创建执行环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);
        // 2.从自定义Source读取数据
        SingleOutputStreamOperator<Tuple3<String, String, Long>> inputDS = env.fromElements(
                Tuple3.of("001", "app", 1000L),
                Tuple3.of("002", "web", 2000L),
                Tuple3.of("003", "html5", 2000L)
        );
        // 3.定义侧输出流
        OutputTag<String> app = new OutputTag<String>("app") {};
        OutputTag<String> web = new OutputTag<String>("web") {};
        // 4.分流
        SingleOutputStreamOperator<String> otherOutput = inputDS.process(new ProcessFunction<Tuple3<String, String, Long>, String>() {
            @Override
            public void processElement(Tuple3<String, String, Long> value, Context ctx, Collector<String> out) throws Exception {
                if (value.f1.equals("app")) {
                    ctx.output(app, value.toString());
                } else if (value.f1.equals("web")) {
                    ctx.output(web, value.toString());
                } else {
                    out.collect(value.toString());
                }
            }
        });

        // 5.获取侧输出流
        DataStream<String> appOutput = otherOutput.getSideOutput(app);
        DataStream<String> webOutput = otherOutput.getSideOutput(web);

        // 6..打印输出
        appOutput.print("app ");
        webOutput.print("web ");
        otherOutput.print("other ");
        // 7.执行
        env.execute();
    }
}

二、合流

2.1、union

package com.hpsk.flink.moreStream;

import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

public class UnionDataSteam {
    public static void main(String[] args) throws Exception {
        // 1.创建执行环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);
        // 2.stream1
        SingleOutputStreamOperator<Tuple3<String, String, Long>> stream1 = env.fromElements(
                Tuple3.of("001", "app", 1000L),
                Tuple3.of("002", "web", 2000L),
                Tuple3.of("003", "html5", 2000L)
        );
        // stream2
        SingleOutputStreamOperator<Tuple3<String, String, Long>> stream2 = env.fromElements(
                Tuple3.of("005", "app", 1000L),
                Tuple3.of("006", "web", 2000L),
                Tuple3.of("007", "html5", 2000L)
        );
        // 3.union 合并流,2条流的类型必须一致
        DataStream<Tuple3<String, String, Long>> union = stream1.union(stream2);
        // 4.打印输出
        union.print("union ");
        // 5.执行
        env.execute();
    }
}

2.2、connect

package com.hpsk.flink.function;

import org.apache.flink.api.common.state.BroadcastState;
import org.apache.flink.api.common.state.MapStateDescriptor;
import org.apache.flink.api.common.state.ReadOnlyBroadcastState;
import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.BroadcastConnectedStream;
import org.apache.flink.streaming.api.datastream.BroadcastStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.co.BroadcastProcessFunction;
import org.apache.flink.util.Collector;


// 模拟动态配置实时数仓中维度表创建
public class BroadcastProcessFunctionDS {
    public static void main(String[] args) throws Exception {
        // 1.创建执行环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);

        // 2.配置流:维度表的配置表
        SingleOutputStreamOperator<String> tableConfigStream = env.fromElements(
                "table1,createTable",
                "table2,createTable",
                "table3,createTable");

        // 3.主流:业务库实时数据流
        SingleOutputStreamOperator<Tuple2<String, String>> MySqlTableStream = env.socketTextStream("hadoop102", 8888)
                .map(line -> Tuple2.of(line.split(",")[0], line.split(",")[1]))
                .returns(Types.TUPLE(Types.STRING, Types.STRING));
        // 将配置流处理成广播流
        MapStateDescriptor<String, String> mapStateDescriptor = new MapStateDescriptor<>("map-state", String.class, String.class);
        BroadcastStream<String> broadcast = tableConfigStream.broadcast(mapStateDescriptor);
        // 连接主流与广播流成连接流, 处理连接流,根据配置信息处理主流数据
        BroadcastConnectedStream<Tuple2<String, String>, String> connectedStream = MySqlTableStream.connect(broadcast);

        SingleOutputStreamOperator<String> result = connectedStream.process(new MyBroadcastProcessFunction(mapStateDescriptor));

        // 5.输出结果
        result.print("output ");
        // 6.执行
        env.execute();
    }

    public static class MyBroadcastProcessFunction extends BroadcastProcessFunction<Tuple2<String, String>, String, String>{
        private MapStateDescriptor<String, String> mapStateDescriptor;

        public MyBroadcastProcessFunction(MapStateDescriptor<String, String> mapStateDescriptor) {
            this.mapStateDescriptor = mapStateDescriptor;
        }

        @Override
        public void processBroadcastElement(String value, Context ctx, Collector<String> out) throws Exception {
            BroadcastState<String, String> broadcastState = ctx.getBroadcastState(mapStateDescriptor);
            String[] split = value.split(",");
            broadcastState.put(split[0].trim(), split[1].trim());
        }

        @Override
        public void processElement(Tuple2<String, String> value, ReadOnlyContext ctx, Collector<String> out) throws Exception {
            ReadOnlyBroadcastState<String, String> broadcastState = ctx.getBroadcastState(mapStateDescriptor);
            String table = value.f0;
            String create = broadcastState.get(table);
            if (create != null) {
                out.collect(value.f0 + "为配置表,需要在phoenix中建表 -> 建表语句:" + create + ", 数据为:" + value.f1);
            } else {
                out.collect(value.f0 + "业务表, 跳过建表");
            }
        }
    }
}

2.3、join

package com.hpsk.flink.function;

import com.hpsk.flink.beans.Event;
import org.apache.flink.api.common.eventtime.SerializableTimestampAssigner;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.co.ProcessJoinFunction;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.util.Collector;

import java.sql.Timestamp;
// 基于间隔的 join
public class ProcessJoinFunctionDS {
    public static void main(String[] args) throws Exception {
        // 1.创建执行环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);
        // 2.数据流一
        SingleOutputStreamOperator<Tuple3<String, String, Long>> orderStream = env.fromElements(
                Tuple3.of("Mary", "order-1", 5000L),
                Tuple3.of("Alice", "order-2", 5000L),
                Tuple3.of("Bob", "order-3", 20000L),
                Tuple3.of("Alice", "order-4", 20000L),
                Tuple3.of("Cary", "order-5", 51000L)
        ).assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple3<String,
                String, Long>>forMonotonousTimestamps()
                .withTimestampAssigner(new SerializableTimestampAssigner<Tuple3<String, String, Long>>() {
                    @Override
                    public long extractTimestamp(Tuple3<String, String, Long> element, long recordTimestamp) {
                        return element.f2;
                    }
                })
        );
        // 3.数据流二
        SingleOutputStreamOperator<Event> clickStream = env.fromElements(
                new Event("Bob", "./cart", 2000L),
                new Event("Alice", "./prod?id=100", 3000L),
                new Event("Alice", "./prod?id=200", 3500L),
                new Event("Bob", "./prod?id=2", 2500L),
                new Event("Alice", "./prod?id=300", 36000L),
                new Event("Bob", "./home", 30000L),
                new Event("Bob", "./prod?id=1", 23000L),
                new Event("Bob", "./prod?id=3", 33000L)
        ).assignTimestampsAndWatermarks(WatermarkStrategy.<Event>forMonotonousTimestamps()
                .withTimestampAssigner(new SerializableTimestampAssigner<Event>() {
                    @Override
                    public long extractTimestamp(Event element, long recordTimestamp) {
                        return element.timestamp;
                    }
                })
        );
        // 4.流一join流二
        SingleOutputStreamOperator<String> result = orderStream
                .keyBy(t -> t.f0)
                .intervalJoin(clickStream.keyBy(t -> t.user))
                // a.timestamp + lowerBound <= b.timestamp <= a.timestamp + upperBound
                .between(Time.seconds(-5), Time.seconds(10))
                .process(new ProcessJoinFunction<Tuple3<String, String, Long>, Event, String>() {
                    @Override
                    public void processElement(Tuple3<String, String, Long> left, Event right, Context ctx, Collector<String> out) throws Exception {
                        out.collect(right + " => {" + left.f0 + ", " + left.f1 + ", " + new Timestamp(left.f2) + "}");
                    }
                });
        // 5.输出结果
        result.print("output ");
        // 6.执行
        env.execute();
    }
}
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值