flink cdc 将binlog日志反序列化成sql语句

package com.ysservice.streaming.source;

import com.alibaba.ververica.cdc.debezium.DebeziumDeserializationSchema;
import com.ysservice.streaming.utils.MysqlUtil;
import com.ysservice.streaming.utils.SystemConstants;
import io.debezium.data.Envelope;
import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.util.Collector;
import org.apache.kafka.connect.data.Field;
import org.apache.kafka.connect.data.Schema;
import org.apache.kafka.connect.data.Struct;
import org.apache.kafka.connect.source.SourceRecord;

/**
 * @Description:将cdc数据反序列化成sql语句
 * @author: WuBo
 * @date:2021/10/14 11:13
 */

public class JsonDebeziumDeserializationSchema implements DebeziumDeserializationSchema {
    @Override
    public void deserialize(SourceRecord sourceRecord, Collector collector) throws Exception {

        //从sourceRecord中获取binlog日志的database,table,type,after,before等数据
        String topic = sourceRecord.topic();
        String[] split = topic.split("[.]");
        String database = split[1];
        String table = split[2];
        //获取操作类型
        Envelope.Operation operation = Envelope.operationFor(sourceRecord);
        //获取数据本身
        Struct struct = (Struct) sourceRecord.value();
        Struct after = struct.getStruct("after");
        Struct before = struct.getStruct("before");

        //用于拼接所有insert的字段名
        String fieldNames = "";
        //用于拼接所有insert的数据
        String datas = "";
        //用于拼接所有更新的数据
        String updataDatas = "";
        //用于拼接所有更新的where条件
        String updataWhereData = "";
        //获取每张表的唯一主键,用于删除数据
        String key = MysqlUtil.getMysqlTableKey(database + "." + table);
        //用于获取每张表唯一主键下所对应的数据
        String key_data = "";

        /*
         	 1,同时存在 beforeStruct 跟 afterStruct数据的话,就代表是update的数据
             2,只存在 beforeStruct 就是delete数据
             3,只存在 afterStruct数据 就是insert数据
        */
        if (after != null && before == null) {
            //拼接insert的sql
            Schema schema = after.schema();
            for (Field field : schema.fields()) {
                Object data = after.get(field);
                String fieldName = field.name();
                String fieldSchema = field.schema().toString();
                if (data != null && !("".equals(data.toString()))) {
                    data = data.toString();
                } else {
                    data = "null";
                }
                if (MysqlUtil.isNumType(fieldSchema) || "null".equals(data.toString())) {
                    datas += data + ",";
                    fieldNames += fieldName + ",";
                } else {
                    datas += "'" + data + "',";
                    fieldNames += fieldName + ",";
                }
            }
        } else if (before != null && after == null) {
            //拼接delete的sql
            Schema schema = before.schema();
            for (Field field : schema.fields()) {
                Object data = before.get(field);
                String fieldName = field.name();
                String fieldSchema = field.schema().toString();

                if (MysqlUtil.isNumType(fieldSchema)) {
                    if (key.equals(fieldName)) {
                        key_data = data.toString();
                    }
                } else {
                    if (key.equals(fieldName)) {
                        key_data = "'" + data.toString() + "'";
                    }
                }
            }
        } else if (before != null && after != null) {
            //拼接update更新后的sql
            Schema schema = after.schema();
            for (Field field : schema.fields()) {
                Object data = after.get(field);
                String fieldName = field.name();
                String fieldSchema = field.schema().toString();
                if (data != null && !("".equals(data.toString()))) {
                    data = data.toString();
                } else {
                    data = "null";
                }
                if (MysqlUtil.isNumType(fieldSchema) || "null".equals(data.toString())) {
                    updataDatas += fieldName + "=" + data + ",";
                } else {
                    updataDatas += fieldName + "=" + "'" + data + "',";
                }
            }
            //拼接update更新前的sql
            Schema before_schema = before.schema();
            for (Field field : before_schema.fields()) {
                Object data = before.get(field);
                String fieldName = field.name();
                String fieldSchema = field.schema().toString();
                if (MysqlUtil.isNumType(fieldSchema)) {
                    if (key.equals(fieldName)) {
                        key_data = data.toString();
                    }
                } else {
                    if (key.equals(fieldName)) {
                        key_data = "'" + data.toString() + "'";
                    }
                }
            }
        }

        //获取gp库的表名
        String gpTableName = SystemConstants.getCdc_gp_schema() + "." + SystemConstants.getCdc_mysql_service_name() + "_" + database + "_" + table;

        //按数据更新类型拼接最终的sql
        String sql = "";
        String type = operation.toString().toLowerCase();
        if ("create".equals(type)) {

            sql = "insert into " + gpTableName + "(" + fieldNames.substring(0, (fieldNames.length() - 1)) + ") values(" + datas.substring(0, datas.length() - 1) + ");";

        } else if ("delete".equals(type)) {

            sql = "delete from " + gpTableName + " where " + key + "=" + key_data + ";";

        } else if ("update".equals(type)) {

            sql = "update " + gpTableName + " set " + updataDatas.substring(0, updataDatas.length() - 1) + " where " + key + "=" + key_data + ";";

        }

        //如果sql有效,就正常写入gp库,如果sql无效就写入gp错误日志库
        if ((!("".equals(fieldNames) && !("".equals(datas)))) || (!("".equals(key) && !("".equals(key_data)))) || (!("".equals(updataDatas) && !("".equals(updataWhereData))))) {
            collector.collect(gpTableName + "<-gpTableName->" + sql);
        } else {
            collector.collect(SystemConstants.getCdc_gp_error_log_table() + "<-gpTableName->" + "insert into " + SystemConstants.getCdc_gp_error_log_table() + "(log_data)" + " values" + "('" + "gpTableName=" + gpTableName + ",type=" + type + ",before=" + before + ",after=" + after + "');");
        }
    }

    @Override
    public TypeInformation<String> getProducedType() {
        return BasicTypeInfo.STRING_TYPE_INFO;
    }

}


package com.ysservice.streaming.utils;

/**
 * @Description:MysqlUtil
 * @author: WuBo
 * @date:2021/10/15 12:24
 */
public class MysqlUtil {


    /**
     * 传入一个mysql的数据类型,判断该数据类型是否是数字型的
     * @param schema
     * @return
     */
    public static Boolean isNumType(String schema) {
        String[] mysqlNumTypeArr = new String[]{
                "TINYINT",
                "MEDIUMINT",
                "INT",
                "INTEGER",
                "BIGINT",
                "FLOAT",
                "DOUBLE",
                "DECIMAL",
                "LONG"
        };
        Boolean flag = false;
        for (String mysqlNumType : mysqlNumTypeArr) {
            if (schema.toUpperCase().contains(mysqlNumType)) {
                flag = true;
                break;
            }
        }
        return flag;
    }

    /**
     * 传入一个表名,获得该表名的唯一主键key
     * @param table
     * @return
     */
    public static String getMysqlTableKey(String table) {
        String[] table_keyArr = SystemConstants.getCdc_table_key();

        String key = "";
        for (String table_key : table_keyArr) {
            String[] split = table_key.split(":");
            if (split[0].equals(table)){
                key = split[1];
            }
        }
        return key;
    }


}

  • 3
    点赞
  • 6
    收藏
    觉得还不错? 一键收藏
  • 3
    评论
Flink CDC 是一个用于从MySQL binlog中获取数据变动的工具。通过引入Flink CDC的jar包,并编写一个main方法,我们可以实现从指定位置拉取消息的功能。具体而言,可以使用.startupOptions(StartupOptions.specificOffset("mysql-bin.000013", 1260))这句代码来指定binlog日志的位置开始读取数据。 需要注意的是,Flink CDC 1.4.0版本支持使用specificOffset方式指定binlog日志的位置开始读取数据,而新版本测试还未支持该功能。 Java是一种全球排名第一的编程语言,在大数据平台中广泛使用,主要包括Hadoop、Spark、Flink等工具,这些工具都是使用Java或Scala开发的。因此,使用Java编写Flink CDC的代码可以很好地与大数据生态系统进行集,实现对MySQL binlog的获取。<span class="em">1</span><span class="em">2</span><span class="em">3</span> #### 引用[.reference_title] - *1* *2* [FlinkCdc从Mysql指定的binlog日志offsetPos位置开始读取数据](https://blog.csdn.net/shy_snow/article/details/122879590)[target="_blank" data-report-click={"spm":"1018.2226.3001.9630","extra":{"utm_source":"vip_chatgpt_common_search_pc_result","utm_medium":"distribute.pc_search_result.none-task-cask-2~all~insert_cask~default-1-null.142^v93^chatsearchT3_2"}}] [.reference_item style="max-width: 50%"] - *3* [Java + 数组 + 初始化](https://download.csdn.net/download/weixin_51202460/88254379)[target="_blank" data-report-click={"spm":"1018.2226.3001.9630","extra":{"utm_source":"vip_chatgpt_common_search_pc_result","utm_medium":"distribute.pc_search_result.none-task-cask-2~all~insert_cask~default-1-null.142^v93^chatsearchT3_2"}}] [.reference_item style="max-width: 50%"] [ .reference_list ]
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值