最全Flink CDC实时同步PG数据库_flink cdc pg

– 设置发布为true
update pg_publication set puballtables=true where pubname is not null;
– 把所有表进行发布
CREATE PUBLICATION dbz_publication FOR ALL TABLES;
– 查询哪些表已经发布
select * from pg_publication_tables;

4、更改表的复制标识包含更新和删除的值

– 更改复制标识包含更新和删除之前值
ALTER TABLE xxxxxx REPLICA IDENTITY FULL;
– 查看复制标识(为f标识说明设置成功)
select relreplident from pg_class where relname=‘xxxxxx’;

二、Flink读取PG数据

1、加载依赖
<dependency>
    <groupId>com.ververica</groupId>
    <artifactId>flink-connector-postgres-cdc</artifactId>
    <version>2.2.0</version>
</dependency>
<dependency>
    <groupId>org.postgresql</groupId>
    <artifactId>postgresql</artifactId>
    <version>42.3.1</version>
</dependency>
<dependency>
    <groupId>org.apache.flink</groupId>
    <artifactId>flink-connector-kafka_${scala.version}</artifactId>
    <version>${flink.version}</version>
    <exclusions>
        <exclusion>
            <artifactId>kafka-clients</artifactId>
            <groupId>org.apache.kafka</groupId>
        </exclusion>
    </exclusions>
</dependency>

注意:如果依赖中有flink-connector-kafka,可能会有冲突,需要手动排除冲突

2、使用Flink CDC创建pg的source
import com.ververica.cdc.connectors.postgres.PostgreSQLSource;
import com.ververica.cdc.debezium.JsonDebeziumDeserializationSchema;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.streaming.api.functions.source.SourceFunction;

public static SourceFunction getPGSource(String database, String schemaList,String tableList, String slotName) {
        Properties properties = new Properties();
        properties.setProperty("snapshot.mode", "always"); //always:全量+增量  never:增量
        properties.setProperty("debezium.slot.name", "pg_cdc");
        //在作业停止后自动清理 slot
        properties.setProperty("debezium.slot.drop.on.stop", "true");
        properties.setProperty("include.schema.changes", "true");
        // PostGres 数据库
        SourceFunction<String> sourceFunction = PostgreSQLSource.<String>builder()
                .hostname("localhost")
                .port(5432)
                .database(database) // monitor postgres database
                .schemaList(schemaList)  // monitor inventory schema
                .tableList(tableList) // monitor products table 支持正则表达式
                .username("postgres")
                .password("postgres")
                .decodingPluginName("pgoutput")
                //Flink CDC 默认一张表占用一个 slot。多个未指定 slot.name 的连接会产生冲突。
                .slotName(slotName)
                .deserializer(new MyDebezium()) // 自定义序列化器,解决pg数据库日期格式的数据问题和时区问题
                //.deserializer(new JsonDebeziumDeserializationSchema()) // 
                .debeziumProperties(properties)
                .build();
        return sourceFunction;
    }
3、自定义序列化器
import com.alibaba.fastjson.JSONObject;
import com.ververica.cdc.debezium.DebeziumDeserializationSchema;
import com.ververica.cdc.debezium.utils.TemporalConversions;
import io.debezium.time.*;
import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.util.Collector;
import org.apache.kafka.connect.data.Field;
import org.apache.kafka.connect.data.SchemaBuilder;
import org.apache.kafka.connect.data.Struct;
import org.apache.kafka.connect.source.SourceRecord;

import java.time.Instant;
import java.time.LocalDateTime;
import java.time.ZoneId;

public class MyDebezium implements DebeziumDeserializationSchema<String> {

    // 日期格式转换时区
    private static String serverTimeZone = "Asia/Shanghai";

    @Override
    public void deserialize(SourceRecord sourceRecord, Collector<String> collector) throws Exception {
        // 1. 创建一个JSONObject用来存放最终封装好的数据
        JSONObject result = new JSONObject();

        // 2. 解析主键
        Struct  key = (Struct)sourceRecord.key();
        JSONObject keyJs = parseStruct(key);

        // 3. 解析值
        Struct value = (Struct) sourceRecord.value();
        Struct source = value.getStruct("source");

        JSONObject beforeJson = parseStruct(value.getStruct("before"));
        JSONObject afterJson = parseStruct(value.getStruct("after"));

        //将数据封装到JSONObject中
        result.put("db", source.get("db").toString().toLowerCase());
        //result.put("schema", source.get("schema").toString().toLowerCase()); 架构名 看是否需要
        result.put("table", source.get("table").toString().toLowerCase());
        result.put("key", keyJs);
        result.put("op", value.get("op").toString());
        result.put("op_ts", LocalDateTime.ofInstant(Instant.ofEpochMilli(source.getInt64("ts_ms")), ZoneId.of(serverTimeZone)));
        result.put("current_ts", LocalDateTime.ofInstant(Instant.ofEpochMilli(value.getInt64("ts_ms")), ZoneId.of(serverTimeZone)));
        result.put("before", beforeJson);
        result.put("after", afterJson);


        //将数据发送至下游
        collector.collect(result.toJSONString());
    }

    private JSONObject parseStruct(Struct valueStruct) {
        if (valueStruct == null) return null;

        JSONObject dataJson = new JSONObject();
        for (Field field : valueStruct.schema().fields()) {
            Object v = valueStruct.get(field);
            String type = field.schema().name();
            Object val = null;

            if (v instanceof Long) {
                long vl = (Long) v;
                val = convertLongToTime(vl, type);
            } else if (v instanceof Integer){
                int iv = (Integer) v;
                val = convertIntToDate(iv, type);
            } else if (v == null) {
                val = null;
            } else {
                val = convertObjToTime(v, type);
            }
            dataJson.put(field.name().toLowerCase(), val);
        }
        return dataJson;
    }

    private Object convertObjToTime(Object obj, String type) {
        Object val = obj;
        if (Time.SCHEMA_NAME.equals(type) || MicroTime.SCHEMA_NAME.equals(type) || NanoTime.SCHEMA_NAME.equals(type)) {
            val = java.sql.Time.valueOf(TemporalConversions.toLocalTime(obj)).toString();
        } else if (Timestamp.SCHEMA_NAME.equals(type) || MicroTimestamp.SCHEMA_NAME.equals(type) || NanoTimestamp.SCHEMA_NAME.equals(type) || ZonedTimestamp.SCHEMA_NAME.equals(type)) {
            val = java.sql.Timestamp.valueOf(TemporalConversions.toLocalDateTime(obj, ZoneId.of(serverTimeZone))).toString();
        }
        return val;
    }

    private Object convertIntToDate(int obj, String type) {
        SchemaBuilder date_schema = SchemaBuilder.int64().name("org.apache.kafka.connect.data.Date");
        Object val = obj;
        if (Date.SCHEMA_NAME.equals(type)) {
            val = org.apache.kafka.connect.data.Date.toLogical(date_schema, obj).toInstant().atZone(ZoneId.of(serverTimeZone)).toLocalDate().toString();
        }
        return val;
    }

    private Object convertLongToTime(long obj, String type) {
        SchemaBuilder time_schema = SchemaBuilder.int64().name("org.apache.kafka.connect.data.Time");
        SchemaBuilder date_schema = SchemaBuilder.int64().name("org.apache.kafka.connect.data.Date");
        SchemaBuilder timestamp_schema = SchemaBuilder.int64().name("org.apache.kafka.connect.data.Timestamp");
        Object val = obj;
        if (Time.SCHEMA_NAME.equals(type)) {
            val = org.apache.kafka.connect.data.Time.toLogical(time_schema, (int)obj).toInstant().atZone(ZoneId.of(serverTimeZone)).toLocalTime().toString();
        } else if (MicroTime.SCHEMA_NAME.equals(type)) {
            val = org.apache.kafka.connect.data.Time.toLogical(time_schema, (int)(obj / 1000)).toInstant().atZone(ZoneId.of(serverTimeZone)).toLocalTime().toString();
        } else if (NanoTime.SCHEMA_NAME.equals(type)) {
            val = org.apache.kafka.connect.data.Time.toLogical(time_schema, (int)(obj / 1000 / 1000)).toInstant().atZone(ZoneId.of(serverTimeZone)).toLocalTime().toString();
        } else if (Timestamp.SCHEMA_NAME.equals(type)) {
            LocalDateTime t = org.apache.kafka.connect.data.Timestamp.toLogical(timestamp_schema, obj).toInstant().atZone(ZoneId.of(serverTimeZone)).toLocalDateTime();
            val = java.sql.Timestamp.valueOf(t).toString();
        } else if (MicroTimestamp.SCHEMA_NAME.equals(type)) {
            LocalDateTime t = org.apache.kafka.connect.data.Timestamp.toLogical(timestamp_schema, obj / 1000).toInstant().atZone(ZoneId.of(serverTimeZone)).toLocalDateTime();
            val = java.sql.Timestamp.valueOf(t).toString();
        } else if (NanoTimestamp.SCHEMA_NAME.equals(type)) {
            LocalDateTime t = org.apache.kafka.connect.data.Timestamp.toLogical(timestamp_schema, obj / 1000 / 1000).toInstant().atZone(ZoneId.of(serverTimeZone)).toLocalDateTime();
            val = java.sql.Timestamp.valueOf(t).toString();
        } else if (Date.SCHEMA_NAME.equals(type)) {
            val = org.apache.kafka.connect.data.Date.toLogical(date_schema, (int)obj).toInstant().atZone(ZoneId.of(serverTimeZone)).toLocalDate().toString();
        }
        return val;
    }

    @Override
    public TypeInformation<String> getProducedType() {
        return BasicTypeInfo.STRING_TYPE_INFO;
    }
}

注:这段代码来自于https://huaweicloud.csdn.net/63356ef0d3efff3090b56c01.html中的scala版本

img
img
img

既有适合小白学习的零基础资料,也有适合3年以上经验的小伙伴深入学习提升的进阶课程,涵盖了95%以上大数据知识点,真正体系化!

由于文件比较多,这里只是将部分目录截图出来,全套包含大厂面经、学习笔记、源码讲义、实战项目、大纲路线、讲解视频,并且后续会持续更新

需要这份系统化资料的朋友,可以戳这里获取

…(img-ACfgUsvq-1714804619947)]

既有适合小白学习的零基础资料,也有适合3年以上经验的小伙伴深入学习提升的进阶课程,涵盖了95%以上大数据知识点,真正体系化!

由于文件比较多,这里只是将部分目录截图出来,全套包含大厂面经、学习笔记、源码讲义、实战项目、大纲路线、讲解视频,并且后续会持续更新

需要这份系统化资料的朋友,可以戳这里获取

  • 3
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值