flink-cdc 实现oracle 实时同步到kudu

其实网上也有很多相关话题的代码实现,但是发现有很多坑。

在 腾讯官方文档中 有介绍,但是屏蔽了很多细节,我做了以下四点修改才能正常运行。

1.前置条件

保证oracle中相关表开启了归档日志和补充日志,因为flink-cdc基于debezium的logminer实现的。

2.进入调试阶段

问题一. maven依赖需要本地install

文档中使用maven依赖如下

<dependency>
    <groupId>com.ververica</groupId>
    <artifactId>flink-connector-oracle-cdc</artifactId>
    <version>2.2-SNAPSHOT</version>
    <!-- 此处依赖需要设置为 scope,其他 flink 依赖需设置为 provied-->
    <scope>compile</scope>
</dependency>

实际过程中在maven仓库中并没有此pom,我使用的是2.2.1版本,而且此版本需要oracle 19c的驱动,自己手动从官网下载jar包,然后mvn install 。

问题二.oracle字段名要大写,否则 oracle的变更数据会获取不到

   tEnv.executeSql("CREATE TABLE `oracleSource` (\n" +
                //字段名大写
                " PK      BIGINT,\n" +     
                " BRANCHID    VARCHAR,\n" +
                " PRIMARY KEY(PK) NOT ENFORCED )\n" +
                " WITH (\n" +
                "  'connector' = 'oracle-cdc',\n" +
                // 请修改成 Oracle 所在的实际 IP 地址
                "  'hostname' = 'xxx',\n" +
                "  'port' = '1521',\n" +
                "  'username' = 'xxx',\n" +
                "  'password' = 'xxx',\n" +
                "  'database-name' = 'xxx',\n" +
                "  'schema-name' = 'xxx',\n" +
                "  'table-name' = 'xxx',\n" +
                 " 'scan.startup.mode' = 'initial'\n"+
                 ")");

问题三.kudu的端口需要开通,kudu master的7051端口,kudu tablet server 的7050端口,否则数据写入kudu报错

问题四.kudud的表名要完整 ,当整个链路都是通的时,发现kudu表没有数据,后来根据 kudu webui的 tables里查看后修改表名后才正常

 tEnv.executeSql("CREATE TABLE `my_second_table_kudu` (\n" +
                " `id`    BIGINT,\n" +
                " `name`  VARCHAR\n" +
                ") WITH (\n" +
                " 'connector.type' = 'kudu',\n" +
                // 请修改成实际的 master IP 地址
                " 'kudu.masters' = 'xxx:7051',\n" +
//kudu.table 以kudu webui的tables 的表名为准
                " 'kudu.table' = 'impala::default.my_second_table',\n" +
                " 'kudu.hash-columns' = 'id',\n" +
                " 'kudu.primary-key-columns' = 'id'\n" +
                ")");

问题五. classNotFoundEexeption 

java.lang.ClassNotFoundException: org.apache.flink.shaded.guava18.com.google.common.util

还是老问题,把依赖引入正常,但是接下来又报错找不到

 org.apache.flink.shaded.guava30.com.google

后来通过新建lib包 add library的方式将flink-shaded-guava-30.1.1-jre-15.0.jar包引入

程序运行终于正常!!!

完整代码如下

package com.xxx.xxx;

import org.apache.flink.configuration.Configuration;
import org.apache.flink.configuration.RestOptions;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;


public class OracleToKudu {
    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        //使用本地webui进行调试
        conf.setInteger(RestOptions.PORT,8082);
        EnvironmentSettings settings = EnvironmentSettings
                .newInstance()
                .useBlinkPlanner()
                .inStreamingMode()
                .build();

        StreamExecutionEnvironment sEnv = StreamExecutionEnvironment.getExecutionEnvironment(conf);
        StreamTableEnvironment tEnv = StreamTableEnvironment.create(sEnv,settings);

        // SQL 写法
         tEnv.executeSql("CREATE TABLE `oracleSource` (\n" +
               //字段大写
                " PK      BIGINT,\n" +
                " BRANCHID    VARCHAR,\n" +
                " PRIMARY KEY(PK) NOT ENFORCED )\n" +
                " WITH (\n" +
                "  'connector' = 'oracle-cdc',\n" +
                // 请修改成 Oracle 所在的实际 IP 地址
                "  'hostname' = 'xxx',\n" +
                "  'port' = '1521',\n" +
                "  'username' = 'xxx',\n" +
                "  'password' = 'xxx',\n" +
                "  'database-name' = 'xxx',\n" +
                "  'schema-name' = '<placeholder>这里需要大写',\n" +
                "  'table-name' = '<placeholder>这里需要大写',\n" +
                 " 'scan.startup.mode' = 'initial'\n"+
                 ")");

       // TableResult tableResult =
        tEnv.executeSql("create view vw_oracleSource as select PK AS id,BRANCHID AS name from oracleSource");

//
        tEnv.executeSql("CREATE TABLE `my_second_table_kudu` (\n" +
                " `id`    BIGINT,\n" +
                " `name`  VARCHAR\n" +
                ") WITH (\n" +
                " 'connector.type' = 'kudu',\n" +
                // 请修改成实际的 master IP 地址
                " 'kudu.masters' = 'xxx:7051',\n" +
                " 'kudu.table' = 'impala::default.my_second_table',\n" +
                " 'kudu.hash-columns' = 'id',\n" +
                " 'kudu.primary-key-columns' = 'id'\n" +
                ")");
        //使用upsert模式
        tEnv.executeSql("UPSERT into my_second_table_kudu select id, name from vw_oracleSource");

       sEnv.execute("kudu_demo1");
    }
}

maven依赖如下

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>org.example</groupId>
    <artifactId>flink-oracle-kudu</artifactId>
    <version>1.0-SNAPSHOT</version>
    <properties>
        <flink.version>1.13.5</flink.version>

    </properties>
<dependencies>
    <!-- https://mvnrepository.com/artifact/com.ververica/flink-connector-oracle-cdc -->
    <dependency>
        <groupId>com.ververica</groupId>
        <artifactId>flink-connector-oracle-cdc</artifactId>
        <version>2.2.1</version>
    </dependency>



    <dependency>
        <groupId>org.apache.flink</groupId>
        <artifactId>flink-runtime-web_2.11</artifactId>
        <version>${flink.version}</version>
    </dependency>

    <dependency>
        <groupId>org.apache.flink</groupId>
        <artifactId>flink-table-api-java-bridge_2.11</artifactId>
        <version>${flink.version}</version>
    </dependency>

    <dependency>
        <groupId>org.apache.flink</groupId>
        <artifactId>flink-table-planner-blink_2.11</artifactId>
        <version>${flink.version}</version>
    </dependency>


   <dependency>
     <groupId>org.apache.flink</groupId>
      <artifactId>flink-shaded-guava</artifactId>
     <version>18.0-11.0</version>
   </dependency>


    <dependency>
        <groupId>org.apache.flink</groupId>
        <artifactId>flink-connector-jdbc_2.11</artifactId>
        <version>${flink.version}</version>
    </dependency>

    <!-- https://mvnrepository.com/artifact/org.apache.bahir/flink-connector-kudu -->
    <dependency>
        <groupId>org.apache.bahir</groupId>
        <artifactId>flink-connector-kudu_2.11</artifactId>
        <version>1.1.0</version>

    </dependency>

    <dependency>
        <groupId>log4j</groupId>
        <artifactId>log4j</artifactId>
        <version>1.2.17</version>
    </dependency>
    <!-- https://mvnrepository.com/artifact/org.slf4j/slf4j-api -->
    <dependency>
        <groupId>org.slf4j</groupId>
        <artifactId>slf4j-api</artifactId>
        <version>1.7.15</version>
    </dependency>
    <!-- https://mvnrepository.com/artifact/org.slf4j/slf4j-nop -->
    <dependency>
        <groupId>org.slf4j</groupId>
        <artifactId>slf4j-nop</artifactId>
        <version>1.7.15</version>
    </dependency>

</dependencies>

<bulid 省略>
</project>

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值