特点:
(1) kerberos认证
(2)Table Env 读取配合HiveCatalog
(3)附测试样例
方便测试FLink功能使用。
import com.amihaiemil.eoyaml.*;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import org.apache.flink.configuration.*;
import org.apache.flink.streaming.api.environment.ExecutionCheckpointingOptions;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.table.api.Schema;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.TableSchema;
import org.apache.flink.table.catalog.*;
import org.apache.flink.table.catalog.exceptions.TableNotExistException;
import org.apache.flink.table.catalog.hive.HiveCatalog;
import org.apache.flink.table.types.DataType;
import org.apache.hadoop.security.UserGroupInformation;
import org.junit.jupiter.api.Test;
import java.io.File;
import java.io.IOException;
import java.time.Duration;
import java.time.temporal.ChronoUnit;
import java.util.*;
@Data
public class FlinkEnvProvider {
public FlinkEnvProvider() {
init();
}
private StreamExecutionEnvironment senv;
private StreamTableEnvironment tenv;
public void init() {
senv = getStreamEnv();
tenv = getTableEnv(senv);
}
@Data
@AllArgsConstructor
@NoArgsConstructor
public static class FlinkCatalog {
private AbstractCatalog catalog;
private String confDir;
}
public static Configuration getFlinkProperties() {
final ConfigOption<String> TASK_MANAGER_LOG_PATH_KEY =
key(ConfigConstants.TASK_MANAGER_LOG_PATH_KEY)
.stringType()
.noDefaultValue()
.withDescription("");
String krb5Conf = "D:\\configs\\krb5.conf";
String keytab = "D:\\configs\\xxx.keytab";
String principal = "xxx@H3COASIS.COM";
System.setProperty("java.security.krb5.conf", krb5Conf);
Configuration config = new Configuration();
config.set(SecurityOptions.KERBEROS_KRB5_PATH, krb5Conf);
config.set(SecurityOptions.KERBEROS_LOGIN_KEYTAB, keytab);
config.set(SecurityOptions.KERBEROS_LOGIN_PRINCIPAL, principal);
config.set(SecurityOptions.KERBEROS_LOGIN_CONTEXTS, "Client,KafkaClient");
config.set(StateBackendOptions.STATE_BACKEND, "hashmap");
config.set(CheckpointingOptions.CHECKPOINTS_DIRECTORY, "file:///chkp"); // 本地路径 不能带有D:/盘符信息
// config.set(CheckpointingOptions.CHECKPOINTS_DIRECTORY, "hdfs://mycluster/flink/flink-checkpoints/it_cdc/simple_test");
config.set(ExecutionCheckpointingOptions.CHECKPOINTING_INTERVAL, Duration.of(20, ChronoUnit.SECONDS));
config.set(RestOptions.BIND_PORT, "8081-8181");
config.set(DeploymentOptions.TARGET, "local");
config.set(WebOptions.LOG_PATH, "tmp/log/job.log"); // 本地路径 不能带有D:/盘符信息
config.set(TASK_MANAGER_LOG_PATH_KEY, "tmp/log/job.log"); // 本地路径 不能带有D:/盘符信息
// config.set(ConfigConstants.TASK_MANAGER_LOG_PATH_KEY, "tmp/log/job.log"); // taskManagerr
config.set(TaskManagerOptions.TASK_HEAP_MEMORY, MemorySize.ofMebiBytes(2048));
config.set(TaskManagerOptions.TASK_OFF_HEAP_MEMORY, MemorySize.ofMebiBytes(512));
config.set(TaskManagerOptions.NUM_TASK_SLOTS, 8); // 建议 多配置写,因为本地只能启动一个TaskManager
try {
UserGroupInformation.loginUserFromKeytab(principal, keytab);
} catch (IOException e) {
throw new RuntimeException(e);
}
return config;
}
public static FlinkCatalog getCatalog() {
String flinkSqlClientPath = "D:/configs/sql-client-defaults.yaml";
YamlMapping mapping;
try {
mapping = Yaml.createYamlInput(new File(flinkSqlClientPath)).readYamlMapping();
} catch (IOException e) {
throw new RuntimeException(e);
}
YamlSequence catalogs = mapping.yamlSequence("catalogs");
YamlMapping defaultCatalog = catalogs.yamlMapping(0);
String catalogName = defaultCatalog.string("name");
String catalogType = defaultCatalog.string("type");
String hiveConfDir = defaultCatalog.string("hive-conf-dir");
String defaultDatabase = defaultCatalog.string("default-database");
String hadoopConfDir = defaultCatalog.string("hadoop-conf-dir");
AbstractCatalog catalog = null;
if ("hive".equalsIgnoreCase(catalogType)) {
catalog = new HiveCatalog(catalogName, defaultDatabase, hiveConfDir, hadoopConfDir, null);
} else {
throw new UnsupportedOperationException("不支持的catalog类型:" + catalogType);
}
// System.out.println(catalogName + "\t->\t" + catalog);
return new FlinkCatalog( catalog,hiveConfDir);
}
private StreamExecutionEnvironment getStreamEnv() {
Configuration config = getFlinkProperties();
StreamExecutionEnvironment senv = StreamExecutionEnvironment.getExecutionEnvironment(config);
senv.enableCheckpointing(10_000);
senv.setParallelism(2);
return senv;
}
private StreamTableEnvironment getTableEnv(StreamExecutionEnvironment senv) {
StreamTableEnvironment tenv = StreamTableEnvironment.create(senv);
FlinkCatalog flinkCatalog = getCatalog();
AbstractCatalog clg = flinkCatalog.getCatalog();
tenv.registerCatalog(clg.getName(), clg);
tenv.useCatalog(clg.getName());
// System.out.println(tenv);
return tenv;
}
@Test
public void testTableEnv() throws TableNotExistException {
StreamExecutionEnvironment senv = getStreamEnv();
StreamTableEnvironment tenv = getTableEnv(senv);
tenv.useDatabase("my_db");
String[] tbls = tenv.listTables();
Arrays.stream(tbls).forEach(System.out::println);
String tablePath = "my_db.my_table";
Table tbl = tenv.from(tablePath);
tbl.printSchema();
// ResolvedSchema resolvedSchema = tbl.getResolvedSchema();
tenv.toChangelogStream(tbl).print();
Optional<Catalog> flink_hive_catalog = tenv.getCatalog("hive_catalog");
HiveCatalog catalog = (HiveCatalog) flink_hive_catalog.get();
ObjectPath path = new ObjectPath("my_db", "my_table");
CatalogTableImpl table = (CatalogTableImpl) catalog.getTable(path);
Map<String, String> options = table.getOptions();
System.out.println(options);
List<String> partitionKeys = table.getPartitionKeys();
TableSchema schema = table.getSchema();
String[] fieldNames = schema.getFieldNames();
DataType[] fieldDataTypes = schema.getFieldDataTypes();
String hdfsPath = options.get("path");
String tableType = options.get("table.type");
Schema unresolvedSchema = table.getUnresolvedSchema();
}
sql-client-defaults.yaml 文件中配置的HiveCatalog。
#==============================================================================
# Catalogs
#==============================================================================
# Define catalogs here.
catalogs:
- name: hive_catalog
type: hive
hive-conf-dir: /usr/xxx/xxx/hive2/conf
default-database: default
hadoop-conf-dir: /usr/xxx/xxx/hadoop/conf
#catalogs: [] # empty list
# A typical catalog definition looks like:
# - name: myhive
# type: hive
# hive-conf-dir: /opt/hive_conf/
# default-database: ...
另外需要注意的是
Hadoop也是需要配置的
haoop conf需要配置环境变量:
hadoop home配置环境变量:
win环境的Haoop home就是 官网下载的二级制 文件 (如:hadoop-3.3.6.tar.gz)解压并添加 winutils.exe 文件。