Flink-connector-sql写入doris on zeppelin 实践

最新推荐文章于 2024-08-05 06:00:00 发布

黄瓜炖啤酒鸭

最新推荐文章于 2024-08-05 06:00:00 发布

阅读量4k

点赞数 2

分类专栏： apache doris 文章标签： doris sql flink+doris

本文链接：https://blog.csdn.net/qq_31866793/article/details/109619711

版权

本文介绍了如何在Flink中实现对Doris的读写操作，通过Flink SQL实现Doris的数据处理。作者详细讲解了自定义Doris Sink的开发过程，包括连接器实现、Stream Load方式以及在Zeppelin环境中提交任务的注意事项。在遇到性能问题时，采用队列确保单线程有序写入。同时，提到了在集群运行时可能出现的打包问题及其解决方案。

摘要由CSDN通过智能技术生成

目标：在Flink代码里面支持对doris的读写操作，Flink sql on doris，将flink sql作为计算引擎。

1，参考：https://ci.apache.org/projects/flink/flink-docs-release-1.11/zh/dev/table/sourceSinks.html#full-stack-example

整个流程：

2，自己开发

1）创建解析和验证选项的工厂，

2）连接器,实现表

3）实现和发现自定义格式，

4）并使用提供的工具，如数据结构转换器和FactoryUtil。

doris sink的实现开发

1，先实现 DorisDynamicTableFactory：

package org.apache.flink.connector.doris.table;

import org.apache.flink.configuration.ConfigOption;
import org.apache.flink.configuration.ConfigOptions;
import org.apache.flink.configuration.ReadableConfig;
import org.apache.flink.table.api.TableSchema;
import org.apache.flink.table.connector.sink.DynamicTableSink;
import org.apache.flink.table.factories.DynamicTableSinkFactory;
import org.apache.flink.table.factories.FactoryUtil;
import org.apache.flink.table.utils.TableSchemaUtils;

import java.time.Duration;
import java.util.HashSet;
import java.util.Set;

/**
 * @program: flink-neiwang-dev
 * @description: 1
 * @author: Mr.Wang
 * @create: 2020-11-12 14:04
 **/
public class DorisDynamicTableSourceFactory implements DynamicTableSinkFactory {
    //todo 名称叫doris
    public static final String IDENTIFIER = "doris";

    public static final ConfigOption<String> URL = ConfigOptions
            .key("url")
            .stringType()
            .noDefaultValue()
            .withDescription("the jdbc database url.");
    public static final ConfigOption<String> TABLE_NAME = ConfigOptions
            .key("table-name")
            .stringType()
            .noDefaultValue()
            .withDescription("the jdbc table name.");
    public static final ConfigOption<String> USERNAME = ConfigOptions
            .key("username")
            .stringType()
            .noDefaultValue()
            .withDescription("the jdbc user name.");
    public static final ConfigOption<String> PASSWORD = ConfigOptions
            .key("password")
            .stringType()
            .noDefaultValue()
            .withDescription("the jdbc password.");
    private static final ConfigOption<String> DRIVER = ConfigOptions
            .key("driver")
            .stringType()
            .noDefaultValue()
            .withDescription("the class name of the JDBC driver to use to connect to this URL. " +
                    "If not set, it will automatically be derived from the URL.");
    // read config options
    private static final ConfigOption<String> SCAN_PARTITION_COLUMN = ConfigOptions
            .key("scan.partition.column")
            .stringType()
            .noDefaultValue()
            .withDescription("the column name used for partitioning the input.");
    private static final ConfigOption<Integer> SCAN_PARTITION_NUM = ConfigOptions
            .key("scan.partition.num")
            .intType()
            .noDefaultValue()
            .withDescription("the number of partitions.");
    private static final ConfigOption<Long> SCAN_PARTITION_LOWER_BOUND = ConfigOptions
            .key("scan.partition.lower-bound")
            .longType()
            .noDefaultValue()
            .withDescription("the smallest value of the first partition.");
    private static final ConfigOption<Long> SCAN_PARTITION_UPPER_BOUND = ConfigOptions
            .key("scan.partition.upper-bound")
            .longType()
            .noDefaultValue()
            .withDescription("the largest value of the last partition.");
    private static final ConfigOption<Integer> SCAN_FETCH_SIZE = ConfigOptions
            .key("scan.fetch-size")
            .intType()
            .defaultValue(0)
            .withDescription("gives the reader a hint as to the number of rows that should be fetched, from" +
                    " the database when reading per round trip. If the value specified is zero, then the hint is ignored. The" +
                    " default value is zero.");

    // look up config options
    private static final ConfigOption<Long> LOOKUP_CACHE_MAX_ROWS = ConfigOptions
            .key("lookup.cache.max-rows")
            .longType()
            .defaultValue(-1L)
            .withDescription("the max number of rows of lookup cache, over this value, the oldest rows will " +
                    "be eliminated. \"cache.max-rows\" and \"cache.ttl\" options must all be specified if any of them is " +
                    "specified. Cache is