Flink CDC实时同步PG数据库_flink cdc pg(3)，算法太TM重要了

最新推荐文章于 2024-07-31 00:15:05 发布

2401_84160272

最新推荐文章于 2024-07-31 00:15:05 发布

阅读量919

点赞数 18

分类专栏：程序员文章标签： flink 数据库算法

本文链接：https://blog.csdn.net/2401_84160272/article/details/137703679

版权

程序员专栏收录该内容

30 篇文章 0 订阅

订阅专栏

– 设置发布为true
update pg_publication set puballtables=true where pubname is not null;
– 把所有表进行发布
CREATE PUBLICATION dbz_publication FOR ALL TABLES;
– 查询哪些表已经发布
select * from pg_publication_tables;

4、更改表的复制标识包含更新和删除的值

– 更改复制标识包含更新和删除之前值
ALTER TABLE xxxxxx REPLICA IDENTITY FULL;
– 查看复制标识（为f标识说明设置成功）
select relreplident from pg_class where relname=‘xxxxxx’;

二、Flink读取PG数据

1、加载依赖

<dependency>
    <groupId>com.ververica</groupId>
    <artifactId>flink-connector-postgres-cdc</artifactId>
    <version>2.2.0</version>
</dependency>
<dependency>
    <groupId>org.postgresql</groupId>
    <artifactId>postgresql</artifactId>
    <version>42.3.1</version>
</dependency>
<dependency>
    <groupId>org.apache.flink</groupId>
    <artifactId>flink-connector-kafka_${scala.version}</artifactId>
    <version>${flink.version}</version>
    <exclusions>
        <exclusion>
            <artifactId>kafka-clients</artifactId>
            <groupId>org.apache.kafka</groupId>
        </exclusion>
    </exclusions>
</dependency>

注意：如果依赖中有flink-connector-kafka,可能会有冲突,需要手动排除冲突

2、使用Flink CDC创建pg的source

import com.ververica.cdc.connectors.postgres.PostgreSQLSource;
import com.ververica.cdc.debezium.JsonDebeziumDeserializationSchema;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.streaming.api.functions.source.SourceFunction;

public static SourceFunction getPGSource(String database, String schemaList,String tableList, String slotName) {
        Properties properties = new Properties();
        properties.setProperty("snapshot.mode", "always"); //always：全量+增量  never:增量
        properties.setProperty("debezium.slot.name", "pg_cdc");
        //在作业停止后自动清理 slot
        properties.setProperty("debezium.slot.drop.on.stop", "true");
        properties.setProperty("include.schema.changes", "true");
        // PostGres 数据库
        SourceFunction<String> sourceFunction = PostgreSQLSource.<String>builder()
                .hostname("localhost")
                .port(5432)
                .database(database) // monitor postgres database
                .schemaList(schemaList)  // monitor inventory schema
                .tableList(tableList) // monitor products table 支持正则表达式
                .username("postgres")
                .password("postgres")
                .decodingPluginName("pgoutput")
                //Flink CDC 默认一张表占用一个 slot。多个未指定 slot.name 的连接会产生冲突。
                .slotName(slotName)
                .deserializer(new MyDebezium()) // 自定义序列化器,解决pg数据库日期格式的数据问题和时区问题
                //.deserializer(new JsonDebeziumDeserializationSchema()) // 
                .debeziumProperties(properties)
                .build();
        return sourceFunction;
    }

3、自定义序列化器

import com.alibaba.fastjson.JSONObject;
import com.ververica.cdc.debezium.DebeziumDeserializationSchema;
import com.ververica.cdc.debezium.utils.TemporalConversions;
import io.debezium.time.*;
import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.util.Collector;
import org.apache.kafka.connect.data.Field;
import org.apache.kafka.connect.data.SchemaBuilder;
import org.apache.kafka.connect.data.Struct;
import org.apache.kafka.connect.source.SourceRecord;

import java.time.Instant;
import java.time.LocalDateTime;
import java.time.ZoneId;

public class MyDebezium implements DebeziumDeserializationSchema<String> {

    // 日期格式转换时区
    private static String serverTimeZone = "Asia/Shanghai";

    @Override
    public void deserialize(SourceRecord sourceRecord, Collector<String> collector) throws Exception {
        // 1. 创建一个JSONObject用来存放最终封装好的数据
        JSONObject result = new JSONObject();

        // 2. 解析主键
        Struct  key = (Struct)sourceRecord.key();
        JSONObject keyJs = parseStruct(key);

        // 3. 解析值
        Struct value = (Struct) sourceRecord.value();
        Struct source = value.getStruct("source");

        JSONObject beforeJson = parseStruct(value.getStruct("before"));
        JSONObject afterJson = parseStruct(value.getStruct("after"));

        //将数据封装到JSONObject中
        result.put("db", source.get("db").toString().toLowerCase());
        //result.put("schema", source.get("schema").toString().toLowerCase()); 架构名 看是否需要
        result.put("table", source.get("table").toString().toLowerCase());
        result.put("key", keyJs);
        result.put("op", value.get("op").toString());
        result.put("op_ts", LocalDateTime.ofInstant(Instant.ofEpochMilli(source.getInt64("ts_ms")), ZoneId.of(serverTimeZone)));
        result.put("current_ts", LocalDateTime.ofInstant(Instant.ofEpochMilli(value.getInt64("ts_ms")), ZoneId.of(serverTimeZone)));
        result.put("before", beforeJson);
        result.put("after", afterJson);


        //将数据发送至下游
        collector.collect(result.toJSONString());
    }

    private JSONObject parseStruct(Struct valueStruct) {
        if (valueStruct == null) return null;

        JSONObject dataJson = new JSONObject();
        for (Field field : valueStruct.schema().fields()) {
            Object v = valueStruct.get(field);
            String type = field.schema().name();
            Object val = null;

            if (v instanceof Long) {
                long vl = (Long) v;
                val = convertLongToTime(vl, type);
            } else if (v instanceof Integer){
                int iv = (Integer) v;
                val = convertIntToDate(iv, type);
            } else if (v == null) {
                val = null;
            } else {
                val = convertObjToTime(v, type);
            }
            dataJson.put(field.name().toLowerCase(), val);
        }
        return dataJson;
    }

    private Object convertObjToTime(Object obj, String type) {
        Object val = obj;
        if (Time.SCHEMA_NAME.equals(type) || MicroTime.SCHEMA_NAME.equals(type) || NanoTime.SCHEMA_NAME.equals(type)) {
            val = java.sql.Time.valueOf(TemporalConversions.toLocalTime(obj)).toString();
        } else if (Timestamp.SCHEMA_NAME.equals(type) || MicroTimestamp.SCHEMA_NAME.equals(type) || NanoTimestamp.SCHEMA_NAME.equals(type) || ZonedTimestamp.SCHEMA_NAME.equals(type)) {
            val = java.sql.Timestamp.valueOf(TemporalConversions.toLocalDateTime(obj, ZoneId.of(serverTimeZone))).toString();
        }
        return val;
    }

    private Object convertIntToDate(int obj, String type) {
        SchemaBuilder date_schema = SchemaBuilder.int64().name("org.apache.kafka.connect.data.Date");
        Object val = obj;
        if (Date.SCHEMA_NAME.equals(type)) {
            val = org.apache.kafka.connect.data.Date.toLogical(date_schema, obj).toInstant().atZone(ZoneId.of(serverTimeZone)).toLocalDate().toString();
        }
        return val;
    }

    private Object convertLongToTime(long obj, String type) {
        SchemaBuilder time_schema = SchemaBuilder.int64().name("org.apache.kafka.connect.data.Time");
        SchemaBuilder date_schema = SchemaBuilder.int64().name("org.apache.kafka.connect.data.Date");
        SchemaBuilder timestamp_schema = SchemaBuilder.int64().name("org.apache.kafka.connect.data.Timestamp");
        Object val = obj;
        if (Time.SCHEMA_NAME.equals(type)) {
            val = org.apache.kafka.connect.data.Time.toLogical(time_schema, (int)obj).toInstant().atZone(ZoneId.of(serverTimeZone)).toLocalTime().toString();
        } else if (MicroTime.SCHEMA_NAME.equals(type)) {
            val = org.apache.kafka.connect.data.Time.toLogical(time_schema, (int)(obj / 1000)).toInstant().atZone(ZoneId.of(serverTimeZone)).toLocalTime().toString();
        } else if (NanoTime.SCHEMA_NAME.equals(type)) {
            val = org.apache.kafka.connect.data.Time.toLogical(time_schema, (int)(obj / 1000 / 1000)).toInstant().atZone(ZoneId.of(serverTimeZone)).toLocalTime().toString();
        } else if (Timestamp.SCHEMA_NAME.equals(type)) {
            LocalDateTime t = org.apache.kafka.connect.data.Timestamp.toLogical(timestamp_schema, obj).toInstant().atZone(ZoneId.of(serverTimeZone)).toLocalDateTime();
            val = java.sql.Timestamp.valueOf(t).toString();
        } else if (MicroTimestamp.SCHEMA_NAME.equals(type)) {
            LocalDateTime t = org.apache.kafka.connect.data.Timestamp.toLogical(timestamp_schema, obj / 1000).toInstant().atZone(ZoneId.of(serverTimeZone)).toLocalDateTime();
            val = java.sql.Timestamp.valueOf(t).toString();
        } else if (NanoTimestamp.SCHEMA_NAME.equals(type)) {
            LocalDateTime t = org.apache.kafka.connect.data.Timestamp.toLogical(timestamp_schema, obj / 1000 / 1000).toInstant().atZone(ZoneId.of(serverTimeZone)).toLocalDateTime();
            val = java.sql.Timestamp.valueOf(t).toString();
        } else if (Date.SCHEMA_NAME.equals(type)) {
            val = org.apache.kafka.connect.data.Date.toLogical(date_schema, (int)obj).toInstant().atZone(ZoneId.of(serverTimeZone)).toLocalDate().toString();
        }
        return val;
    }

    @Override
    public TypeInformation<String> getProducedType() {
        return BasicTypeInfo.STRING_TYPE_INFO;
    }
}

注：这段代码来自于https://huaweicloud.csdn.net/63356ef0d3efff3090b56c01.html中的scala版本

自我介绍一下，小编13年上海交大毕业，曾经在小公司待过，也去过华为、OPPO等大厂，18年进入阿里一直到现在。

深知大多数大数据工程师，想要提升技能，往往是自己摸索成长或者是报班学习，但对于培训机构动则几千的学费，着实压力不小。自己不成体系的自学效果低效又漫长，而且极易碰到天花板技术停滞不前！

因此收集整理了一份《2024年大数据全套学习资料》，初衷也很简单，就是希望能够帮助到想自学提升又不知道该从何学起的朋友。

既有适合小白学习的零基础资料，也有适合3年以上经验的小伙伴深入学习提升的进阶课程，基本涵盖了95%以上大数据开发知识点，真正体系化！

由于文件比较大，这里只是将部分目录大纲截图出来，每个节点里面都包含大厂面经、学习笔记、源码讲义、实战项目、讲解视频，并且后续会持续更新

如果你觉得这些内容对你有帮助，可以添加VX：vip204888 （备注大数据获取）

一个人可以走的很快，但一群人才能走的更远。不论你是正从事IT行业的老鸟或是对IT行业感兴趣的新人，都欢迎扫码加入我们的的圈子（技术交流、学习资源、职场吐槽、大厂内推、面试辅导），让我们一起学习成长！

部分目录大纲截图出来，每个节点里面都包含大厂面经、学习笔记、源码讲义、实战项目、讲解视频，并且后续会持续更新**

如果你觉得这些内容对你有帮助，可以添加VX：vip204888 （备注大数据获取）
[外链图片转存中…(img-8S4cNmG3-1712961325106)]

2401_84160272

关注

18
点赞
踩
5

收藏

觉得还不错? 一键收藏
1
评论
Flink CDC实时同步PG数据库_flink cdc pg(3)，算法太TM重要了

/ 日期格式转换时区@Override// 1. 创建一个JSONObject用来存放最终封装好的数据// 2. 解析主键// 3. 解析值//将数据封装到JSONObject中架构名看是否需要//将数据发送至下游val = null;return val;return val;@Override。
复制链接

扫一扫