其实网上也有很多相关话题的代码实现,但是发现有很多坑。
在 腾讯官方文档中 有介绍,但是屏蔽了很多细节,我做了以下四点修改才能正常运行。
1.前置条件
保证oracle中相关表开启了归档日志和补充日志,因为flink-cdc基于debezium的logminer实现的。
2.进入调试阶段
问题一. maven依赖需要本地install
文档中使用maven依赖如下
<dependency> <groupId>com.ververica</groupId> <artifactId>flink-connector-oracle-cdc</artifactId> <version>2.2-SNAPSHOT</version> <!-- 此处依赖需要设置为 scope,其他 flink 依赖需设置为 provied--> <scope>compile</scope> </dependency>
实际过程中在maven仓库中并没有此pom,我使用的是2.2.1版本,而且此版本需要oracle 19c的驱动,自己手动从官网下载jar包,然后mvn install 。
问题二.oracle字段名要大写,否则 oracle的变更数据会获取不到
tEnv.executeSql("CREATE TABLE `oracleSource` (\n" +
//字段名大写
" PK BIGINT,\n" +
" BRANCHID VARCHAR,\n" +
" PRIMARY KEY(PK) NOT ENFORCED )\n" +
" WITH (\n" +
" 'connector' = 'oracle-cdc',\n" +
// 请修改成 Oracle 所在的实际 IP 地址
" 'hostname' = 'xxx',\n" +
" 'port' = '1521',\n" +
" 'username' = 'xxx',\n" +
" 'password' = 'xxx',\n" +
" 'database-name' = 'xxx',\n" +
" 'schema-name' = 'xxx',\n" +
" 'table-name' = 'xxx',\n" +
" 'scan.startup.mode' = 'initial'\n"+
")");
问题三.kudu的端口需要开通,kudu master的7051端口,kudu tablet server 的7050端口,否则数据写入kudu报错
问题四.kudud的表名要完整 ,当整个链路都是通的时,发现kudu表没有数据,后来根据 kudu webui的 tables里查看后修改表名后才正常
tEnv.executeSql("CREATE TABLE `my_second_table_kudu` (\n" +
" `id` BIGINT,\n" +
" `name` VARCHAR\n" +
") WITH (\n" +
" 'connector.type' = 'kudu',\n" +
// 请修改成实际的 master IP 地址
" 'kudu.masters' = 'xxx:7051',\n" +
//kudu.table 以kudu webui的tables 的表名为准
" 'kudu.table' = 'impala::default.my_second_table',\n" +
" 'kudu.hash-columns' = 'id',\n" +
" 'kudu.primary-key-columns' = 'id'\n" +
")");
问题五. classNotFoundEexeption
java.lang.ClassNotFoundException: org.apache.flink.shaded.guava18.com.google.common.util
还是老问题,把依赖引入正常,但是接下来又报错找不到
org.apache.flink.shaded.guava30.com.google
后来通过新建lib包 add library的方式将flink-shaded-guava-30.1.1-jre-15.0.jar包引入
程序运行终于正常!!!
完整代码如下
package com.xxx.xxx;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.configuration.RestOptions;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
public class OracleToKudu {
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
//使用本地webui进行调试
conf.setInteger(RestOptions.PORT,8082);
EnvironmentSettings settings = EnvironmentSettings
.newInstance()
.useBlinkPlanner()
.inStreamingMode()
.build();
StreamExecutionEnvironment sEnv = StreamExecutionEnvironment.getExecutionEnvironment(conf);
StreamTableEnvironment tEnv = StreamTableEnvironment.create(sEnv,settings);
// SQL 写法
tEnv.executeSql("CREATE TABLE `oracleSource` (\n" +
//字段大写
" PK BIGINT,\n" +
" BRANCHID VARCHAR,\n" +
" PRIMARY KEY(PK) NOT ENFORCED )\n" +
" WITH (\n" +
" 'connector' = 'oracle-cdc',\n" +
// 请修改成 Oracle 所在的实际 IP 地址
" 'hostname' = 'xxx',\n" +
" 'port' = '1521',\n" +
" 'username' = 'xxx',\n" +
" 'password' = 'xxx',\n" +
" 'database-name' = 'xxx',\n" +
" 'schema-name' = '<placeholder>这里需要大写',\n" +
" 'table-name' = '<placeholder>这里需要大写',\n" +
" 'scan.startup.mode' = 'initial'\n"+
")");
// TableResult tableResult =
tEnv.executeSql("create view vw_oracleSource as select PK AS id,BRANCHID AS name from oracleSource");
//
tEnv.executeSql("CREATE TABLE `my_second_table_kudu` (\n" +
" `id` BIGINT,\n" +
" `name` VARCHAR\n" +
") WITH (\n" +
" 'connector.type' = 'kudu',\n" +
// 请修改成实际的 master IP 地址
" 'kudu.masters' = 'xxx:7051',\n" +
" 'kudu.table' = 'impala::default.my_second_table',\n" +
" 'kudu.hash-columns' = 'id',\n" +
" 'kudu.primary-key-columns' = 'id'\n" +
")");
//使用upsert模式
tEnv.executeSql("UPSERT into my_second_table_kudu select id, name from vw_oracleSource");
sEnv.execute("kudu_demo1");
}
}
maven依赖如下
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.example</groupId>
<artifactId>flink-oracle-kudu</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<flink.version>1.13.5</flink.version>
</properties>
<dependencies>
<!-- https://mvnrepository.com/artifact/com.ververica/flink-connector-oracle-cdc -->
<dependency>
<groupId>com.ververica</groupId>
<artifactId>flink-connector-oracle-cdc</artifactId>
<version>2.2.1</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-runtime-web_2.11</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-java-bridge_2.11</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner-blink_2.11</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-shaded-guava</artifactId>
<version>18.0-11.0</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-jdbc_2.11</artifactId>
<version>${flink.version}</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.bahir/flink-connector-kudu -->
<dependency>
<groupId>org.apache.bahir</groupId>
<artifactId>flink-connector-kudu_2.11</artifactId>
<version>1.1.0</version>
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>1.2.17</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.slf4j/slf4j-api -->
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.7.15</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.slf4j/slf4j-nop -->
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-nop</artifactId>
<version>1.7.15</version>
</dependency>
</dependencies>
<bulid 省略>
</project>