Flink之CDC


一、开启Binlog日志

编辑:vi /etc/my.cnf
追加如下内容
#数据库id
server-id = 1
#启动binlog,该参数的值会作为binlog的文件名
log-bin=mysql-bin
#binlog类型,maxwell要求为row类型
binlog_format=row
#启用binlog的数据库,需根据实际情况作出修改
binlog-do-db=gmall
binlog-do-db=gmall-config

二、DataStream

package com.hpsk.flink.cdc;

import com.alibaba.fastjson.JSONObject;
import com.ververica.cdc.connectors.mysql.source.MySqlSource;
import com.ververica.cdc.connectors.mysql.table.StartupOptions;
import com.ververica.cdc.debezium.DebeziumDeserializationSchema;
import io.debezium.data.Envelope;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.runtime.state.filesystem.FsStateBackend;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.CheckpointConfig;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;
import org.apache.kafka.connect.source.SourceRecord;
import org.apache.kafka.connect.data.Field;
import org.apache.kafka.connect.data.Struct;


public class Flink_CDC {
    public static void main(String[] args) throws Exception {
        //1.创建执行环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);
        //2.Flink-CDC 将读取 binlog 的位置信息以状态的方式保存在 CK,如果想要做到断点续传,需要从 Checkpoint 或者 Savepoint 启动程序
        //2.1 开启 Checkpoint,每隔 5 秒钟做一次 CK
        //env.enableCheckpointing(5000L);
        //2.2 指定 CK 的一致性语义
        //env.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        //2.3 设置任务关闭的时候保留最后一次 CK 数据
        //env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
        //2.4 指定从 CK 自动重启策略
        //env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3, 2000L));
        //2.5 设置状态后端
        //env.setStateBackend(new FsStateBackend("hdfs://ip:8020/flinkCDC"));
        //2.6 设置访问 HDFS 的用户名
        //System.setProperty("HADOOP_USER_NAME", "atguigu");
        //3.创建 Flink-MySQL-CDC 的 Source
        MySqlSource<String> mySqlSource = MySqlSource.<String>builder()
                .hostname("ip")
                .port(3306)
                .username("用户名")
                .password("密码")
                .databaseList("数据库")
                .tableList("数据库.表名") //可选配置项,如果不指定该参数,则会 读取上一个配置下的所有表的数据,注意:指定的时候需要使用"db.table"的方式
                .startupOptions(StartupOptions.initial())
                .deserializer(new CustomerDeserializationSchema()) //自定义序列化输出格式
                .build();
        //4.使用 CDC Source 从 MySQL 读取数据
        DataStreamSource<String> mysqlDS = env.fromSource(mySqlSource,
                WatermarkStrategy.noWatermarks(),
                "mySqlSource");
        //5.打印数据
        mysqlDS.print();
        //6.启动任务
        env.execute("FlinkCDCDataStream");
    }

    /**
     * 自定义序列化设计输入数据格式
     */
    public static class CustomerDeserializationSchema implements DebeziumDeserializationSchema<String>{
        @Override
        public void deserialize(SourceRecord sourceRecord, Collector<String> collector) throws Exception {
            // 获取主题信息,包含着数据库和表名mysql_binlog_source.gmall.activity_info
            String topic = sourceRecord.topic();
            String[] arr = topic.split("\\.");
            String db = arr[1];
            String tableName = arr[2];
            // 获取操作类型 READ DELETE UPDATE CREATE
            Envelope.Operation operation = Envelope.operationFor(sourceRecord);
            //获取值信息并转换为 Struct 类型
            Struct value = (Struct) sourceRecord.value();
            // 获取变化前的数据
            Struct before = value.getStruct("before");
            JSONObject beforeJson = new JSONObject();
            if (before != null) {
                for (Field field : before.schema().fields()) {
                    Object o = before.get(field);
                    beforeJson.put(field.name(), o);
                }
            }
            // 获取变化后的数据
            Struct after = value.getStruct("after");
            // 创建 JSON 对象用于存储数据信息
            JSONObject afterJson = new JSONObject();
            for (Field field : after.schema().fields()) {
                Object o = after.get(field);
                afterJson.put(field.name(), o);
            }
            // 创建 JSON 对象用于封装最终返回值数据信息
            JSONObject result = new JSONObject();
            result.put("database", db);
            result.put("table", tableName);
            result.put("operation", operation.toString().toLowerCase());
            result.put("before", beforeJson);
            result.put("after", afterJson);
            // 发送数据至下游
            collector.collect(result.toJSONString());
        }

        @Override
        public TypeInformation<String> getProducedType() {
            return TypeInformation.of(String.class);
        }
    }
}

三、SQL

package com.hpsk.flink.cdc;

import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;

public class FlinkSQL_CDC {
    public static void main(String[] args) throws Exception {
        //1.创建执行环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);
        StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
        //2.创建 Flink-MySQL-CDC 的 Source
        tableEnv.executeSql("CREATE TABLE activity_info (" +
                " `id` INT PRIMARY KEY," +
                " `activity_name` STRING," +
                " `activity_type` STRING," +
                " `activity_desc` STRING," +
                " `start_time` STRING," +
                " `end_time` STRING," +
                " `create_time` STRING" +
                ") WITH (" +
                " 'connector' = 'mysql-cdc'," +
                " 'hostname' = 'ip地址'," +
                " 'port' = '3306'," +
                " 'username' = '用户名'," +
                " 'password' = '密码'," +
                " 'database-name' = '数据库'," +
                " 'table-name' = '表名'" +
                ")");
        tableEnv.executeSql("select * from activity_info").print();
        env.execute();
    }
}
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值