Flink IDE本地环境配置Stream和Table ENV工具类( kerbero 和 HiveCatalog 环境 )

特点:
(1) kerberos认证
(2)Table Env 读取配合HiveCatalog
(3)附测试样例

方便测试FLink功能使用。

import com.amihaiemil.eoyaml.*;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import org.apache.flink.configuration.*;
import org.apache.flink.streaming.api.environment.ExecutionCheckpointingOptions;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.table.api.Schema;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.TableSchema;
import org.apache.flink.table.catalog.*;
import org.apache.flink.table.catalog.exceptions.TableNotExistException;
import org.apache.flink.table.catalog.hive.HiveCatalog;
import org.apache.flink.table.types.DataType;
import org.apache.hadoop.security.UserGroupInformation;
import org.junit.jupiter.api.Test;

import java.io.File;
import java.io.IOException;
import java.time.Duration;
import java.time.temporal.ChronoUnit;
import java.util.*;

@Data
public class FlinkEnvProvider {
    public FlinkEnvProvider() {
        init();
    }

    private StreamExecutionEnvironment senv;
    private StreamTableEnvironment tenv;

    public void init() {
        senv = getStreamEnv();
        tenv = getTableEnv(senv);
    }

    @Data
    @AllArgsConstructor
    @NoArgsConstructor
    public static class FlinkCatalog {
        private AbstractCatalog catalog;
        private String confDir;
    }

    public static Configuration getFlinkProperties() {
        final ConfigOption<String> TASK_MANAGER_LOG_PATH_KEY =
                key(ConfigConstants.TASK_MANAGER_LOG_PATH_KEY)
                        .stringType()
                        .noDefaultValue()
                        .withDescription("");
        String krb5Conf = "D:\\configs\\krb5.conf";
        String keytab = "D:\\configs\\xxx.keytab";
        String principal = "xxx@H3COASIS.COM";
        System.setProperty("java.security.krb5.conf", krb5Conf);

        Configuration config = new Configuration();
        config.set(SecurityOptions.KERBEROS_KRB5_PATH, krb5Conf);
        config.set(SecurityOptions.KERBEROS_LOGIN_KEYTAB, keytab);
        config.set(SecurityOptions.KERBEROS_LOGIN_PRINCIPAL, principal);
        config.set(SecurityOptions.KERBEROS_LOGIN_CONTEXTS, "Client,KafkaClient");
        config.set(StateBackendOptions.STATE_BACKEND, "hashmap");
        config.set(CheckpointingOptions.CHECKPOINTS_DIRECTORY, "file:///chkp"); // 本地路径 不能带有D:/盘符信息
        // config.set(CheckpointingOptions.CHECKPOINTS_DIRECTORY, "hdfs://mycluster/flink/flink-checkpoints/it_cdc/simple_test");
        config.set(ExecutionCheckpointingOptions.CHECKPOINTING_INTERVAL, Duration.of(20, ChronoUnit.SECONDS));
        config.set(RestOptions.BIND_PORT, "8081-8181");
        config.set(DeploymentOptions.TARGET, "local");
        config.set(WebOptions.LOG_PATH, "tmp/log/job.log");  // 本地路径 不能带有D:/盘符信息
        config.set(TASK_MANAGER_LOG_PATH_KEY, "tmp/log/job.log"); // 本地路径 不能带有D:/盘符信息

        // config.set(ConfigConstants.TASK_MANAGER_LOG_PATH_KEY, "tmp/log/job.log"); // taskManagerr
        config.set(TaskManagerOptions.TASK_HEAP_MEMORY, MemorySize.ofMebiBytes(2048));
        config.set(TaskManagerOptions.TASK_OFF_HEAP_MEMORY, MemorySize.ofMebiBytes(512));
        config.set(TaskManagerOptions.NUM_TASK_SLOTS, 8);  // 建议 多配置写,因为本地只能启动一个TaskManager
        try {
            UserGroupInformation.loginUserFromKeytab(principal, keytab);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
        return config;
    }


    public static FlinkCatalog getCatalog() {
        String flinkSqlClientPath = "D:/configs/sql-client-defaults.yaml";
        YamlMapping mapping;
        try {
            mapping = Yaml.createYamlInput(new File(flinkSqlClientPath)).readYamlMapping();
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
        YamlSequence catalogs = mapping.yamlSequence("catalogs");
        YamlMapping defaultCatalog = catalogs.yamlMapping(0);
        String catalogName = defaultCatalog.string("name");
        String catalogType = defaultCatalog.string("type");
        String hiveConfDir = defaultCatalog.string("hive-conf-dir");
        String defaultDatabase = defaultCatalog.string("default-database");
        String hadoopConfDir = defaultCatalog.string("hadoop-conf-dir");
        AbstractCatalog catalog = null;
        if ("hive".equalsIgnoreCase(catalogType)) {
            catalog = new HiveCatalog(catalogName, defaultDatabase, hiveConfDir, hadoopConfDir, null);
        } else {
            throw new UnsupportedOperationException("不支持的catalog类型:" + catalogType);
        }
        // System.out.println(catalogName + "\t->\t" + catalog);
        return new FlinkCatalog( catalog,hiveConfDir);
    }

    private StreamExecutionEnvironment getStreamEnv() {
        Configuration config = getFlinkProperties();
        StreamExecutionEnvironment senv = StreamExecutionEnvironment.getExecutionEnvironment(config);
        senv.enableCheckpointing(10_000);
        senv.setParallelism(2);
        return senv;
    }
    
    private StreamTableEnvironment getTableEnv(StreamExecutionEnvironment senv) {
        StreamTableEnvironment tenv = StreamTableEnvironment.create(senv);
        FlinkCatalog flinkCatalog = getCatalog();
        AbstractCatalog clg = flinkCatalog.getCatalog();
        tenv.registerCatalog(clg.getName(), clg);
        tenv.useCatalog(clg.getName());
        // System.out.println(tenv);
        return tenv;
    }
    @Test
    public void testTableEnv() throws TableNotExistException {
        StreamExecutionEnvironment senv = getStreamEnv();
        StreamTableEnvironment tenv = getTableEnv(senv);
        tenv.useDatabase("my_db");
        String[] tbls = tenv.listTables();
        Arrays.stream(tbls).forEach(System.out::println);
        String tablePath = "my_db.my_table";
        Table tbl = tenv.from(tablePath);
        tbl.printSchema();
        // ResolvedSchema resolvedSchema = tbl.getResolvedSchema();
        tenv.toChangelogStream(tbl).print();
        Optional<Catalog> flink_hive_catalog = tenv.getCatalog("hive_catalog");
        HiveCatalog catalog = (HiveCatalog) flink_hive_catalog.get();

        ObjectPath path = new ObjectPath("my_db", "my_table");
        CatalogTableImpl table = (CatalogTableImpl) catalog.getTable(path);
        Map<String, String> options = table.getOptions();
        System.out.println(options);
        List<String> partitionKeys = table.getPartitionKeys();
        TableSchema schema = table.getSchema();
        String[] fieldNames = schema.getFieldNames();
        DataType[] fieldDataTypes = schema.getFieldDataTypes();
        String hdfsPath = options.get("path");
        String tableType = options.get("table.type");
        Schema unresolvedSchema = table.getUnresolvedSchema();
    }

sql-client-defaults.yaml 文件中配置的HiveCatalog。

#==============================================================================
# Catalogs
#==============================================================================

# Define catalogs here.
catalogs:
   - name: hive_catalog
     type: hive
     hive-conf-dir: /usr/xxx/xxx/hive2/conf
     default-database: default
     hadoop-conf-dir: /usr/xxx/xxx/hadoop/conf
#catalogs: [] # empty list
# A typical catalog definition looks like:
#  - name: myhive
#    type: hive
#    hive-conf-dir: /opt/hive_conf/
#    default-database: ...

另外需要注意的是
Hadoop也是需要配置的
haoop conf需要配置环境变量:
在这里插入图片描述hadoop home配置环境变量:
在这里插入图片描述
win环境的Haoop home就是 官网下载的二级制 文件 (如:hadoop-3.3.6.tar.gz)解压并添加 winutils.exe 文件。
在这里插入图片描述

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值