Flink SQL Connector
目录:
文章目录
环境版本
组件 | 版本 |
---|---|
oracle | 11.2.0.2 |
flink | 1.13.6 |
scala | 2.11 |
Flink源码下载打包
[root@taia-3 flink]# git clone https://github.com/apache/flink.git
mvn clean install -DskipTests -Dfast -Pskip-webui-build -T 1C
接口开发
新增如下三个类,包名需要与源码一致
- OracleSQLRowConverter:org.apache.flink.connector.jdbc.internal.converter
- OracleDialect:org.apache.flink.connector.jdbc.dialect
- JdbcDialects.class:org.apache.flink.connector.jdbc.dialect
JdbcDialects
添加oracle方言,源码如下
package org.apache.flink.connector.jdbc.dialect;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;
/** Default JDBC dialects. */
public final class JdbcDialects {
private static final List<JdbcDialect> DIALECTS =
Arrays.asList(new DerbyDialect(), new MySQLDialect(), new PostgresDialect(),
new OracleDialect());
/** Fetch the JdbcDialect class corresponding to a given database url. */
public static Optional<JdbcDialect> get(String url) {
for (JdbcDialect dialect : DIALECTS) {
if (dialect.canHandle(url)) {
return Optional.of(dialect);
}
}
return Optional.empty();
}
}
OracleRowConverter
package org.apache.flink.connector.jdbc.internal.converter;
import org.apache.flink.table.types.logical.RowType;
/**
* Runtime converter that responsible to convert between JDBC object and Flink internal object for
* Oracle.
*/
public class OracleRowConverter extends AbstractJdbcRowConverter {
private static final long serialVersionUID = 1L;
public OracleRowConverter(RowType rowType) {
super(rowType);
}
@Override
public String converterName() {
return "Oracle";
}
}
OracleDialect
定义oracle方言,源码如下:
package org.apache.flink.connector.jdbc.dialect;
import org.apache.flink.connector.jdbc.internal.converter.JdbcRowConverter;
import org.apache.flink.connector.jdbc.internal.converter.OracleRowConverter;
import org.apache.flink.table.types.logical.LogicalTypeRoot;
import org.apache.flink.table.types.logical.RowType;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
/** JDBC dialect for Oracle. */
class OracleDialect extends AbstractDialect {
private static final long serialVersionUID = 1L;
// Define MAX/MIN precision of TIMESTAMP type according to Oracle docs:
// https://www.techonthenet.com/oracle/datatypes.php
private static final int MAX_TIMESTAMP_PRECISION = 9;
private static final int MIN_TIMESTAMP_PRECISION = 1;
// Define MAX/MIN precision of DECIMAL type according to Oracle docs:
// https://www.techonthenet.com/oracle/datatypes.php
private static final int MAX_DECIMAL_PRECISION = 38;
private static final int MIN_DECIMAL_PRECISION = 1;
/**
* Check if this dialect instance can handle a certain jdbc url.
*
* @param url the jdbc url.
*
* @return True if the dialect can be applied on the given jdbc url.
*/
@Override
public boolean canHandle(String url) {
return url.startsWith("jdbc:oracle:");
}
/**
* Get converter that convert jdbc object and Flink internal object each other.
*
* @param rowType the given row type
*
* @return a row converter for the database
*/
@Override
public JdbcRowConverter getRowConverter(RowType rowType) {
return new OracleRowConverter(rowType);
}
/**
* Get limit clause to limit the number of emitted row from the jdbc source.
*
* @param limit number of row to emit. The value of the parameter should be non-negative.
*
* @return the limit clause.
*/
@Override
public String getLimitClause(long limit) {
return "FETCH FIRST " + limit + " ROWS ONLY";
}
@Override
public Optional<String> defaultDriverName() {
return Optional.of("oracle.jdbc.driver.OracleDriver");
}
/**
* Get the name of jdbc dialect.
*
* @return the dialect name.
*/
@Override
public String dialectName() {
return "Oracle";
}
@Override
public Optional<String> getUpsertStatement(
String tableName, String[] fieldNames, String[] uniqueKeyFields) {
String sourceFields =
Arrays.stream(fieldNames)
.map(f -> ":" + f + " " + quoteIdentifier(f))
.collect(Collectors.joining(", "));
String onClause =
Arrays.stream(uniqueKeyFields)
.map(f -> "t." + quoteIdentifier(f) + "=s." + quoteIdentifier(f))
.collect(Collectors.joining(" and "));
final Set<String> uniqueKeyFieldsSet =
Arrays.stream(uniqueKeyFields).collect(Collectors.toSet());
String updateClause =
Arrays.stream(fieldNames)
.filter(f -> !uniqueKeyFieldsSet.contains(f))
.map(f -> "t." + quoteIdentifier(f) + "=s." + quoteIdentifier(f))
.collect(Collectors.joining(", "));
String insertFields =
Arrays.stream(fieldNames)
.map(this::quoteIdentifier)
.collect(Collectors.joining(", "));
String valuesClause =
Arrays.stream(fieldNames)
.map(f -> "s." + quoteIdentifier(f))
.collect(Collectors.joining(", "));
// if we can't divide schema and table-name is risky to call quoteIdentifier(tableName)
// for example [tbo].[sometable] is ok but [tbo.sometable] is not
String mergeQuery =
" MERGE INTO "
+ tableName
+ " t "
+ " USING (SELECT "
+ sourceFields
+ " FROM DUAL) s "
+ " ON ("
+ onClause
+ ") "
+ " WHEN MATCHED THEN UPDATE SET "
+ updateClause
+ " WHEN NOT MATCHED THEN INSERT ("
+ insertFields
+ ")"
+ " VALUES ("
+ valuesClause
+ ")";
return Optional.of(mergeQuery);
}
@Override
public String quoteIdentifier(String identifier) {
return identifier;
}
@Override
public int maxDecimalPrecision() {
return MAX_DECIMAL_PRECISION;
}
@Override
public int minDecimalPrecision() {
return MIN_DECIMAL_PRECISION;
}
@Override
public int maxTimestampPrecision() {
return MAX_TIMESTAMP_PRECISION;
}
@Override
public int minTimestampPrecision() {
return MIN_TIMESTAMP_PRECISION;
}
/**
* Defines the unsupported types for the dialect.
*
* @return a list of logical type roots.
*/
@Override
public List<LogicalTypeRoot> unsupportedTypes() {
// The data types used in Oracle are list at:
// https://www.techonthenet.com/oracle/datatypes.php
return Arrays.asList(
LogicalTypeRoot.BINARY,
LogicalTypeRoot.TIMESTAMP_WITH_LOCAL_TIME_ZONE,
LogicalTypeRoot.TIMESTAMP_WITH_TIME_ZONE,
LogicalTypeRoot.INTERVAL_YEAR_MONTH,
LogicalTypeRoot.INTERVAL_DAY_TIME,
LogicalTypeRoot.ARRAY,
LogicalTypeRoot.MULTISET,
LogicalTypeRoot.MAP,
LogicalTypeRoot.ROW,
LogicalTypeRoot.DISTINCT_TYPE,
LogicalTypeRoot.STRUCTURED_TYPE,
LogicalTypeRoot.NULL,
LogicalTypeRoot.RAW,
LogicalTypeRoot.SYMBOL,
LogicalTypeRoot.UNRESOLVED);
}
}
打包
flink-connector-jdbc_2.11-1.13.6.jar 位于D:\sourcecode\Flink\flink-1.13.6\flink-connectors\flink-connector-jdbc\target\flink-connector-jdbc_2.11-1.13.6.jar
mvn clean install -DskipTests -Dfast -Pskip-webui-build -T 1C
测试
Junit
package org.apache.flink.connector.jdbc.table;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.TableEnvironment;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.types.Row;
import org.apache.flink.util.CollectionUtil;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.sql.SQLException;
import java.util.Iterator;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import static org.junit.Assert.assertEquals;
/**
* ITCase for {@link JdbcDynamicTableSource}.
*/
public class JdbcDynamicOracleTableSourceITCase {
public static final String DRIVER_CLASS = "oracle.jdbc.driver.OracleDriver";
public static final String DB_URL = "jdbc:oracle:thin:@XXX.XXX.XXX.XXX:49161:XE";
public static final String INPUT_TABLE = "SQLTEST";
public static final String USER_NAME = "system";
public static final String PASS_WORD = "oracle";
public static StreamExecutionEnvironment env;
public static TableEnvironment tEnv;
@Before
public void before() throws ClassNotFoundException, SQLException {
env = StreamExecutionEnvironment.getExecutionEnvironment();
EnvironmentSettings envSettings =
EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build();
tEnv = StreamTableEnvironment.create(env, envSettings);
}
@After
public void clearOutputTable() throws Exception {
}
@Test
public void testSelectJdbcSource() throws Exception {
tEnv.executeSql(
"CREATE TABLE "
+ INPUT_TABLE
+ "("
+ "id DECIMAL,"
+ "name STRING,"
+ "age DECIMAL"
+ ") WITH ("
+ " 'connector'='jdbc',"
+ " 'url'='" + DB_URL
+ "',"
+ " 'username' = 'system', "
+ " 'password' = 'oracle', "
+ " 'table-name'='"
+ INPUT_TABLE
+ "'"
+ ")");
Iterator<Row> collected =
tEnv.executeSql("SELECT id , name FROM " + INPUT_TABLE)
.collect();
List<String> result =
CollectionUtil.iteratorToList(collected).stream()
.map(Row::toString)
.sorted()
.collect(Collectors.toList());
List<String> expected =
Stream.of(
"+I[1, fanfan]",
"+I[2, wangwu]",
"+I[3, zhaoliu]",
"+I[4, james]",
"+I[54, wangkobe]")
.sorted()
.collect(Collectors.toList());
assertEquals(expected, result);
}
@Test
public void testUpsert() throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.getConfig().enableObjectReuse();
StreamTableEnvironment tEnv = StreamTableEnvironment.create(env);
tEnv.executeSql(
"CREATE TABLE "
+ INPUT_TABLE
+ "("
+ "id DECIMAL,"
+ "name STRING,"
+ "age DECIMAL ,"
+ "PRIMARY KEY(id) NOT ENFORCED"
+ ") WITH ("
+ " 'connector'='jdbc',"
+ " 'url'='" + DB_URL
+ "',"
+ " 'username' = 'system', "
+ " 'password' = 'oracle', "
+ " 'table-name'='"
+ INPUT_TABLE
+ "'"
+ ")");
// tEnv.executeSql("INSERT INTO SQLTEST values(4,'curry',32)").print();
tEnv.executeSql("INSERT INTO SQLTEST select 4,'james',32 ").print();
}
@Test
public void testProject() throws Exception {
tEnv.executeSql(
"CREATE TABLE "
+ INPUT_TABLE
+ "("
+ "id DECIMAL,"
+ "name STRING,"
+ "age DECIMAL"
+ ") WITH ("
+ " 'connector'='jdbc',"
+ " 'url'='" + DB_URL
+ "',"
+ " 'username' = 'system', "
+ " 'password' = 'oracle', "
+ " 'table-name'='"
+ INPUT_TABLE
+ "',"
+ " 'scan.partition.column'='id',"
+ " 'scan.partition.num'='2',"
+ " 'scan.partition.lower-bound'='0',"
+ " 'scan.partition.upper-bound'='100'"
+ ")");
Iterator<Row> collected =
tEnv.executeSql("SELECT id,name FROM " + INPUT_TABLE)
.collect();
List<String> result =
CollectionUtil.iteratorToList(collected).stream()
.map(Row::toString)
.sorted()
.collect(Collectors.toList());
List<String> expected =
Stream.of("+I[1, wanghuan]",
"+I[2, wangwu]",
"+I[3, zhaoliu]",
"+I[54, wangkobe]")
.sorted()
.collect(Collectors.toList());
assertEquals(expected, result);
}
}
上传jar到flink lib下
需要将ojdbc6-11.2.0.4.jar 放在$FLINK_HOME/lib下
需要将包含Oracle Connec 的flink-connector-jdbc_2.11-1.13.6.jar 放在$FLINK_HOME/lib下 , 替换掉旧的flink-connector-jdbc_2.11-1.13.6.jar
Sink Oracle
需要提前手动创建oracle表结构
注意事项:Oracle表区分大小写, 字段名也区分大小写。
Flink SQL
CREATE TABLE users (
`id` BIGINT,
`name` STRING,
`ts` STRING
) WITH (
'connector' = 'kafka',
'topic' = 'user',
'properties.bootstrap.servers' = 'XXX.XXX.XXX.XXX:9092',
'properties.group.id' = 'testGroup',
'scan.startup.mode' = 'earliest-offset',
'format' = 'json'
);
CREATE TABLE kafka_sink_oracle (
id BIGINT ,
name STRING,
ts STRING ,
PRIMARY KEY(id) NOT ENFORCED
) WITH (
'connector' = 'jdbc',
'url' = 'jdbc:oracle:thin:@XXX.XXX.XXX.XXX:49161:XE',
'driver' = 'oracle.jdbc.driver.OracleDriver',
'username' = 'system',
'password' = 'oracle',
'table-name' = 'JDBC_SINK_ORACLE'
);
INSERT INTO kafka_sink_oracle select id,upper(name) as name ,ts from users;
验证
注意针对表名大小写不同,需要加双引号
SELECT * FROM JDBC_SINK_ORACLE ;
Jar包下载
https://download.csdn.net/download/wanghuan524/85638875