记一次mysql-cdc读多表分流后幂等写入回流mysql

本次是一次实验性操作,Flink版本是1.13.3,mysql-cdc版本是2.0.2,mysql版本是8.0.25。

第一步:使用cdc来读取库中的两张表,这里有一个细节是序列化器我使用的是自定义的序列化器,需要知道如何自定义序列化器可以查看cdc官网:MySQL CDC Connector — CDC Connectors for Apache Flink® documentation

//注册流环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
//注册表环境
StreamTableEnvironment tEnv = StreamTableEnvironment.create(env);
//连接数据源
DebeziumSourceFunction<String> mysql = MySqlSource.<String>builder()
                .hostname("127.0.0.1")
                .port(3306)
                .username("root")
                .password("root")
                .databaseList(ConnConfig.DATABASE)
                .tableList(ConnConfig.TABLE_LIST)
                .startupOptions(StartupOptions.initial())
                .deserializer(new MyCustomerDeserialization())
                .build();
//转为Source
DataStreamSource<String> mysqlDS = env.addSource(mysql);

第二步:根据mysql中表来定义Pojo类,这里必须要为类定义空参构造器,不然后续会出错。

第三步:定义侧输出流并分流

OutputTag<Department> departmentTag = new OutputTag<Department>("department") {
        };
OutputTag<Employee> employeeTag = new OutputTag<Employee>("employee") {
        };

//分流
SingleOutputStreamOperator<Object> mainDataStream = mysqlDS.process(new ProcessFunction<String, Object>() {
      @Override
      public void processElement(String value, ProcessFunction<String, Object>.Context ctx, Collector<Object> out) throws Exception {
            JSONObject jsonObject = JSONObject.parseObject(value);
            String table = jsonObject.getString("table");
            JSONObject after = jsonObject.getJSONObject("after");
            if ("employee".equals(table)) {
                 Integer id = after.getInteger("id");
                 String name = after.getString("name");
                 Integer age = after.getInteger("age");
                 Integer dept_id = after.getInteger("dept_id");
                 ctx.output(employeeTag, new Employee(id, name, age, dept_id, Long.parseLong(after.getString("create_time"))));
                } else if ("department".equals(table)) {
                    Department department = new Department(after.getInteger("id"), after.getString("name"), after.getLong("create_time"));
                    ctx.output(departmentTag, department);
                }
            }
        });
//Employee表
DataStream<Employee> employeeStream = mainDataStream.getSideOutput(employeeTag).assignTimestampsAndWatermarks(WatermarkStrategy
                .<Employee>forBoundedOutOfOrderness(Duration.ofSeconds(0))
                .withTimestampAssigner((element, recordTimestamp) -> element.getCreate_time()));

//Department表
DataStream<Department> departmentStream = mainDataStream.getSideOutput(departmentTag).assignTimestampsAndWatermarks(WatermarkStrategy
                .<Department>forBoundedOutOfOrderness(Duration.ofSeconds(0))
                .withTimestampAssigner((element, recordTimestamp) -> element.getCreate_time()));

第四步:注册虚拟表环境并编写SQL

//POJO中需要定义空参构造器
Table empTable = tEnv.fromDataStream(employeeStream, $("id"), $("name"), $("age"), $("dept_id"), $("create_time").rowtime());

Table depTable = tEnv.fromDataStream(departmentStream, $("id"), $("name"), $("create_time").rowtime());
tEnv.createTemporaryView("employee", empTable);
tEnv.createTemporaryView("department", depTable);

        /*
         * 两个表不能连接在一起开窗,只能单独先开窗再join,要开窗的话必须要使用datastream版的sql
         * 或者一个使用datastream的sql一个直接sql,也能写,但是直接sql的不能开窗
         * */

Table table = tEnv.sqlQuery("select b.name,count(distinct a.name) as num " +
                "from employee a left join department b on a.dept_id=b.id " +
                "group by b.name");
//Schema schema = Schema.newBuilder().column("name", DataTypes.STRING()).column("num", DataTypes.BIGINT()).build();

DataStream<Row> rowDataStream = tEnv.toChangelogStream(table);

第五步:由于是实时流入数据,所以在聚合层面上肯定是有会数据撤回和重流入的操作的,这里我们过滤掉了撤回流数据和空值数据,保证每次都是新结果数据流入。

SingleOutputStreamOperator<Row> resultStream = rowDataStream.filter(x -> {
            RowKind kind = x.getKind();
            //过滤掉null和撤回流操作
            return (RowKind.INSERT == kind || RowKind.UPDATE_AFTER == kind) && x.getField("name") != null && x.getField("num") != null;
        });

//mysql幂等性写入
resultStream.addSink(JdbcSink.sink(
                "INSERT INTO result " +
                        "VALUES" +
                        "(?,?) ON DUPLICATE KEY UPDATE name=?" +
                        ",num=?",
                (ps, t) -> {
                    ps.setString(1, Objects.requireNonNull(t.getField(0)).toString());
                    ps.setDouble(2, Double.parseDouble(Objects.requireNonNull(t.getField(1)).toString()));
                    ps.setString(3, Objects.requireNonNull(t.getField(0)).toString());
                    ps.setDouble(4, Double.parseDouble(Objects.requireNonNull(t.getField(1)).toString()));
                },
                new JdbcExecutionOptions.Builder()
                        .withBatchSize(1)
                        .build(),
                new JdbcConnectionOptions.JdbcConnectionOptionsBuilder()
                        .withUrl("jdbc:mysql://127.0.0.1:3306/spider_base?useSSL=false")
                        .withUsername("root")
                        .withPassword("root")
                        .withDriverName(Driver.class.getName())
                        .build()
        ));
env.execute();

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值