FlinkTable ClickhouseConnector范例

本文详述了如何使用Flink的Table API与Clickhouse数据集成,通过实例解析配置和操作步骤,深入理解大数据实时处理流程。
摘要由CSDN通过智能技术生成
package connector.clickhouse.table.catalog;



import connector.clickhouse.table.ClickHouseDynamicTableFactory;
import connector.clickhouse.table.config.ClickHouseConfig;
import connector.clickhouse.table.util.ClickHouseTypeUtil;
import connector.clickhouse.table.util.ClickHouseUtil;
import org.apache.flink.table.api.TableSchema;
import org.apache.flink.table.catalog.*;
import org.apache.flink.table.catalog.exceptions.*;
import org.apache.flink.table.catalog.stats.CatalogColumnStatistics;
import org.apache.flink.table.catalog.stats.CatalogTableStatistics;
import org.apache.flink.table.expressions.Expression;
import org.apache.flink.table.factories.Factory;
import org.apache.flink.util.StringUtils;
import org.apache.flink.util.function.FunctionUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import ru.yandex.clickhouse.BalancedClickhouseDataSource;
import ru.yandex.clickhouse.ClickHouseConnection;
import ru.yandex.clickhouse.response.ClickHouseColumnInfo;
import ru.yandex.clickhouse.response.ClickHouseResultSetMetaData;
import ru.yandex.clickhouse.settings.ClickHouseQueryParam;

import javax.annotation.Nullable;
import java.lang.reflect.Method;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.util.*;
import java.util.stream.Stream;


import static org.apache.flink.util.Preconditions.checkArgument;

/** ClickHouse catalog. */
public class ClickHouseCatalog extends AbstractCatalog {

    private static final Logger LOG = LoggerFactory.getLogger(ClickHouseCatalog.class);

    public static final String DEFAULT_DATABASE = "default";

    public static final String BUILTIN_DATABASE = "system";

    private final String baseUrl;

    private final String username;

    private final String password;

    private final Map<String, String> properties;

    private ClickHouseConnection connection;

    public ClickHouseCatalog(String catalogName, Map<String, String> properties) {
        this(
                catalogName,
                properties.get(ClickHouseConfig.DATABASE_NAME),
                properties.get(ClickHouseConfig.URL),
                properties.get(ClickHouseConfig.USERNAME),
                properties.get(ClickHouseConfig.PASSWORD),
                properties);
    }

    public ClickHouseCatalog(
            String catalogName,
            @Nullable String defaultDatabase,
            String baseUrl,
            String username,
            String password) {
        this(catalogName, defaultDatabase, baseUrl, username, password, Collections.emptyMap());
    }

    public ClickHouseCatalog(
            String catalogName,
            @Nullable String defaultDatabase,
            String baseUrl,
            String username,
            String password,
            Map<String, String> properties) {
        super(catalogName, defaultDatabase == null ? DEFAULT_DATABASE : defaultDatabase);

        checkArgument(
                !StringUtils.isNullOrWhitespaceOnly(baseUrl), "baseUrl cannot be null or empty");
        checkArgument(
                !StringUtils.isNullOrWhitespaceOnly(username), "username cannot be null or empty");
        checkArgument(
                !StringUtils.isNullOrWhitespaceOnly(password), "password cannot be null or empty");

        this.baseUrl = baseUrl.endsWith("/") ? baseUrl : baseUrl + "/";
        this.username = username;
        this.password = password;
        this.properties = Collections.unmodifiableMap(properties);
    }

    @Override
    public void open() throws CatalogException {
        try {
            Properties configuration = new Properties();
            configuration.putAll(properties);
            configuration.put(ClickHouseQueryParam.USER.getKey(), username);
            configuration.put(ClickHouseQueryParam.PASSWORD.getKey(), password);
            String jdbcUrl = ClickHouseUtil.getJdbcUrl(baseUrl, getDefaultDatabase());
            BalancedClickhouseDataSource dataSource =
                    new BalancedClickhouseDataSource(jdbcUrl, configuration);
            dataSource.actualize();
            connection = dataSource.getConnection();
            LOG.info("Created catalog {}, established connection to {}", getName(), jdbcUrl);
        } catch (Exception e) {
            throw new CatalogException(String.format("Opening catalog %s failed.", getName()), e);
        }
    }

    @Override
    public synchronized void close() throws CatalogException {
        try {
            connection.close();
            LOG.info("Closed catalog {} ", getName());
        } catch (Exception e) {
            throw new CatalogException(String.format("Closing catalog %s failed.", getName()), e);
        }
    }

    @Override
    public Optional<Factory> getFactory() {
        return Optional.of(new ClickHouseDynamicTableFactory());
    }

    // ------------- databases -------------

    @Override
    public synchronized List<String> listDatabases() throws CatalogException {
        // Sometimes we need to look up database `system`, so we won't get rid of it.
        try (PreparedStatement stmt =
                        connection.prepareStatement("SELECT name from `system`.databases");
                ResultSet rs = stmt.executeQuery()) {
            List<String> databases = new ArrayList<>();

            while (rs.next()) {
                databases.add(rs.getString(1));
            }

            return databases;
        } catch (Exception e) {
            throw new CatalogException(
                    String.format("Failed listing database in catalog %s", getName()), e);
        }
    }

    @Override
    public CatalogDatabase getDatabase(String databaseName)
            throws DatabaseNotExistException, CatalogException {
        if (listDatabases().contains(databaseName)) {
            return new CatalogDatabaseImpl(Collections.emptyMap(), null);
        } else {
            throw new DatabaseNotExistException(getName(), databaseName);
        }
    }

    @Override
    public boolean databaseExists(String databaseName) throws CatalogException {
        checkArgument(!StringUtils.isNullOrWhitespaceOnly(databaseName));

        return listDatabases().contains(databaseName);
    }

    @Override
    public void createDatabase(String name, CatalogDatabase database, boolean ignoreIfExists)
            throws DatabaseAlreadyExistException, CatalogException {
        throw new UnsupportedOperationException();
    }

    @Override
    public void dropDatabase(String name, boolean ignoreIfNotExists, boolean cascade)
            throws DatabaseNotEmptyException, CatalogException {
        throw new UnsupportedOperationException();
    }

    @Override
    public void alterDatabase(String name, CatalogDatabase newDatabase, boolean ignoreIfNotExists)
            throws DatabaseNotExistException, CatalogException {
        throw new UnsupportedOperationException();
    }

    // ------------- tables -------------

    @Override
    public synchronized List<String> listTables(String databaseName)
            throws DatabaseNotExistException, CatalogException {
        if (!databaseExists(databaseName)) {
            throw new DatabaseNotExistException(getName(), databaseName);
        }

        try (PreparedStatement stmt =
                        connection.prepareStatement(
                                String.format(
                                        "SELECT name from `system`.tables where database = '%s'",
                                        databaseName));
                ResultSet rs = stmt.executeQuery()) {
            List<String> tables = new ArrayList<>();

            while (rs.next()) {
                tables.add(rs.getString(1));
            }

            return tables;
        } catch (Exception e) {
            throw new CatalogException(
                    String.format(
                            "Failed listing tables in catalog %s database %s",
                            getName(), databaseName),
                    e);
        }
    }

    @Override
    public List<String> listViews(String databaseName)
            throws DatabaseNotExistException, CatalogException {
        throw new UnsupportedOperationException();
    }

    @Override
    public CatalogBaseTable getTable(ObjectPath tablePath)
            throws TableNotExistException, CatalogException {
        // TODO add primary/partition key in the future?
        if (!tableExists(tablePath)) {
            throw new TableNotExistException(getName(), tablePath);
        }

        Map<String, String> configuration = new HashMap<>(properties);
        configuration.put(ClickHouseConfig.URL, baseUrl);
        configuration.put(ClickHouseConfig.DATABASE_NAME, tablePath.getDatabaseName());
        configuration.put(ClickHouseConfig.TABLE_NAME, tablePath.getObjectName());
        configuration.put(ClickHouseConfig.USERNAME, username);
        configuration.put(ClickHouseConfig.PASSWORD, password);

        return new CatalogTableImpl(
                createTableSchema(tablePath.getDatabaseName(), tablePath.getObjectName()),
                configuration,
                "");
    }

    private synchronized TableSchema createTableSchema(String databaseName, String tableName) {
        // 1.Maybe has compatibility problems with the different version of clickhouse jdbc. 2. Is
        // it more appropriate to use type literals from `system.columns` to convert Flink data
        // types? 3. All queried data will be obtained before PreparedStatement is closed, so we
        // must add `limit 0` statement to avoid data transmission to the client, look at
        // `ChunkedInputStream.close()` for more info.
        try (PreparedStatement stmt =
                connection.prepareStatement(
                        String.format(
                                "SELECT * from `%s`.`%s` limit 0", databaseName, tableName))) {
            ClickHouseResultSetMetaData metaData =
                    stmt.getMetaData().unwrap(ClickHouseResultSetMetaData.class);
            Method getColMethod = metaData.getClass().getDeclaredMethod("getCol", int.class);
            getColMethod.setAccessible(true);
            return Stream.iterate(1, i -> i + 1)
                    .limit(metaData.getColumnCount())
                    .map(
                            FunctionUtils.uncheckedFunction(
                                    (index) -> getColMethod.invoke(metaData, index)))
                    .map(o -> (ClickHouseColumnInfo) o)
                    .reduce(
                            TableSchema.builder(),
                            (builder, columnInfo) ->
                                    builder.field(
                                            columnInfo.getColumnName(),
                                            ClickHouseTypeUtil.toFlinkType(columnInfo)),
                            (builder1, builder2) -> builder2)
                    .build();
        } catch (Exception e) {
            throw new CatalogException(
                    String.format(
                            "Failed getting columns in catalog %s database %s table %s",
                            getName(), databaseName, tableName),
                    e);
        }
    }

    @Override
    public boolean tableExists(ObjectPath tablePath) throws CatalogException {
        try {
            return databaseExists(tablePath.getDatabaseName())
                    && listTables(tablePath.getDatabaseName()).contains(tablePath.getObjectName());
        } catch (DatabaseNotExistException e) {
            return false;
        }
    }

    @Override
    public void dropTable(ObjectPath tablePath, boolean ignoreIfNotExists)
            throws TableNotExistException, CatalogException {
        throw new UnsupportedOperationException();
    }

    @Override
    public void renameTable(ObjectPath tablePath, String newTableName, boolean ignoreIfNotExists)
            throws TableNotExistException, TableAlreadyExistException, CatalogException {
        throw new UnsupportedOperationException();
    }

    @Override
    public void createTable(ObjectPath tablePath, CatalogBaseTable table, boolean ignoreIfExists)
            throws TableAlreadyExistException, DatabaseNotExistException, CatalogException {
        throw new UnsupportedOperationException();
    }

    @Override
    public void alterTable(
            ObjectPath tablePath, CatalogBaseTable newTable, boolean ignoreIfNotExists)
            throws TableNotExistException, CatalogException {
        throw new UnsupportedOperationException();
    }

    // ------------- partitions -------------

    @Override
    public List<CatalogPartitionSpec> listPartitions(ObjectPath tablePath)
            throws TableNotExistException, TableNotPartitionedException, CatalogException {
        throw new UnsupportedOperationException();
    }

    @Override
    public List<CatalogPartitionSpec> listPartitions(
            ObjectPath tablePath, CatalogPartitionSpec partitionSpec)
            throws TableNotExistException, TableNotPartitionedException,
                    PartitionSpecInvalidException, CatalogException {
        throw new UnsupportedOperationException();
    }

    @Override
    public List<CatalogPartitionSpec> listPartitionsByFilter(
            ObjectPath tablePath, List<Expression> filters)
            throws TableNotExistException, TableNotPartitionedException, CatalogException {
        throw new UnsupportedOperationException();
    }

    @Override
    public CatalogPartition getPartition(ObjectPath tablePath, CatalogPartitionSpec partitionSpec)
            throws PartitionNotExistException, CatalogException {
        throw new UnsupportedOperationException();
    }

    @Override
    public boolean partitionExists(ObjectPath tablePath, CatalogPartitionSpec partitionSpec)
            throws CatalogException {
        throw new UnsupportedOperationException();
    }

    @Override
    public void createPartition(
            ObjectPath tablePath,
            CatalogPartitionSpec partitionSpec,
            CatalogPartition partition,
            boolean ignoreIfExists)
            throws TableNotExistException, TableNotPartitionedException,
                    PartitionSpecInvalidException, PartitionAlreadyExistsException,
                    CatalogException {
        throw new UnsupportedOperationException();
    }

    @Override
    public void dropPartition(
            ObjectPath tablePath, CatalogPartitionSpec partitionSpec, boolean ignoreIfNotExists)
            throws PartitionNotExistException, CatalogException {
        throw new UnsupportedOperationException();
    }

    @Override
    public void alterPartition(
            ObjectPath tablePath,
            CatalogPartitionSpec partitionSpec,
            CatalogPartition newPartition,
            boolean ignoreIfNotExists)
            throws PartitionNotExistException, CatalogException {
        throw new UnsupportedOperationException();
    }

    // ------------- functions -------------

    @Override
    public List<String> listFunctions(String dbName)
            throws DatabaseNotExistException, CatalogException {
        return Collections.emptyList();
    }

    @Override
    public CatalogFunction getFunction(ObjectPath functionPath)
            throws FunctionNotExistException, CatalogException {
        throw new FunctionNotExistException(getName(), functionPath);
    }

    @Override
    public boolean functionExists(ObjectPath functionPath) throws CatalogException {
        return false;
    }

    @Override
    public void createFunction(
            ObjectPath functionPath, CatalogFunction function, boolean ignoreIfExists)
            throws FunctionAlreadyExistException, DatabaseNotExistException, CatalogException {
        throw new UnsupportedOperationException();
    }

    @Override
    public void alterFunction(
            ObjectPath functionPath, CatalogFunction newFunction, boolean ignoreIfNotExists)
            throws FunctionNotExistException, CatalogException {
        throw new UnsupportedOperationException();
    }

    @Override
    public void dropFunction(ObjectPath functionPath, boolean ignoreIfNotExists)
            throws FunctionNotExistException, CatalogException {
        throw new UnsupportedOperationException();
    }

    // ------------- statistics -------------

    @Override
    public CatalogTableStatistics getTableStatistics(ObjectPath tablePath)
            throws TableNotExistException, CatalogException {
        throw new UnsupportedOperationException();
    }

    @Override
    public CatalogColumnStatistics getTableColumnStatistics(ObjectPath tablePath)
            throws TableNotExistException, CatalogException {
        return CatalogColumnStatistics.UNKNOWN;
    }

    @Override
    public CatalogTableStatistics getPartitionStatistics(
            ObjectPath tablePath, CatalogPartitionSpec partitionSpec)
            throws PartitionNotExistException, CatalogException {
        throw new UnsupportedOperationException();
    }

    @Override
    public CatalogColumnStatistics getPartitionColumnStatistics(
            ObjectPath tablePath, CatalogPartitionSpec partitionSpec)
            throws PartitionNotExistException, CatalogException {
        return CatalogColumnStatistics.UNKNOWN;
    }

    @Override
    public void alterTableStatistics(
            ObjectPath tablePath, CatalogTableStatistics tableStatistics, boolean ignoreIfNotExists)
            throws TableNotExistException, CatalogException {
        throw new UnsupportedOperationException();
    }

    @Override
    public void alterTableColumnStatistics(
            ObjectPath tablePath,
            CatalogColumnStatistics columnStatistics,
            boolean ignoreIfNotExists)
            throws TableNotExistException, CatalogException, TablePartitionedException {
        throw new UnsupportedOperationException();
    }

    @Override
    public void alterPartitionStatistics(
            ObjectPath tablePath,
            CatalogPartitionSpec partitionSpec,
            CatalogTableStatistics partitionStatistics,
            boolean ignoreIfNotExists)
            throws PartitionNotExistException, CatalogException {
        throw new UnsupportedOperationException();
    }

    @Override
    public void alterPartitionColumnStatistics(
            ObjectPath tablePath,
            CatalogPartitionSpec partitionSpec,
            CatalogColumnStatistics columnStatistics,
            boolean ignoreIfNotExists)
            throws PartitionNotExistException, CatalogException {
        throw new UnsupportedOperationException();
    }
}
package connector.clickhouse.table.catalog;


import connector.clickhouse.table.config.ClickHouseConfig;
import org.apache.flink.table.catalog.Catalog;
import org.apache.flink.table.descriptors.DescriptorProperties;
import org.apache.flink.table.factories.CatalogFactory;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import static org.apache.flink.table.descriptors.CatalogDescriptorValidator.*;

/** Factory for {@link ClickHouseCatalog}. */
public class ClickHouseCatalogFactory implements CatalogFactory {

    @Override
    public Map<String, String> requiredContext() {
        Map<String, String> context = new HashMap<>(2);
        context.put(CATALOG_TYPE, ClickHouseConfig.IDENTIFIER);
        context.put(CATALOG_PROPERTY_VERSION, "1");
        return context;
    }

    @Override
    public List<String> supportedProperties() {
        List<String> properties = new ArrayList<>();
        properties.add(CATALOG_DEFAULT_DATABASE);
        properties.add(ClickHouseConfig.URL);
        properties.add(ClickHouseConfig.USERNAME);
        properties.add(ClickHouseConfig.PASSWORD);
        return properties;
    }

    @Override
    public Catalog createCatalog(String name, Map<String, String> properties) {
        final DescriptorProperties props = getValidatedProperties(properties);

        return new ClickHouseCatalog(
                name,
                props.getString(CATALOG_DEFAULT_DATABASE),
                props.getString(ClickHouseConfig.URL),
                props.getString(ClickHouseConfig.USERNAME),
                props.getString(ClickHouseConfig.PASSWORD),
                properties);
    }

    private static DescriptorProperties getValidatedProperties(Map<String, String> properties) {
        final DescriptorProperties descriptorProperties = new DescriptorProperties(true);
        descriptorProperties.putProperties(properties);
        new ClickHouseCatalogValidator().validate(descriptorProperties);
        return descriptorProperties;
    }
}

package connector.clickhouse.table.catalog;

import connector.clickhouse.table.config.ClickHouseConfig;
import org.apache.flink.table.descriptors.CatalogDescriptorValidator;
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值