最近在使用Flink集成Hive,遇到不少问题,这里记录一下代码案例,供大家参考
方式一: 使用 FlinkSql-Hive-SqlDialect (sql方言)
// 设置用户为 hadoop 访问 hdfs
System.setProperty("HADOOP_USER_NAME", "your_user_name");
// 执行环境
StreamTableEnvironment tableEnv = FlinkUtils.getTableEnv();
// 配置 HiveCatalog
String catalog_name = "myhive";
String defaultDatabase = "ods";
// 指向包含 hive-site.xml 目录的 URI
String hiveConfDir = "src\\main\\resources";
// 设置 及 注册 HiveCatalog
HiveCatalog hive = new HiveCatalog(catalog_name, defaultDatabase, hiveConfDir);
tableEnv.registerCatalog("myhive", hive);
// 将HiveCatalog设置为 当前Catalog
tableEnv.useCatalog(catalog_name);
// 设置 hive方言
tableEnv.getConfig().setSqlDialect(SqlDialect.HIVE);
// 注册 hive-sink-table
tableEnv.executeSql(
"CREATE TABLE IF NOT EXISTS tb_test ( " +
" semesterId STRING, " +
" teacherId STRING, " +
" courseId STRING " +
" ) partitioned by (dt STRING) " +
" STORED AS parquet " +
" TBLPROPERTIES ( " +
" 'partition.time-extractor.timestamp-pattern' = '$dt', " +
" 'sink.partition-commit.trigger' = 'process-time', " +
" 'sink.partition-commit.delay' = '0 s', " +
" 'sink.partition-commit.policy.kind' = 'metastore,success-file' " +
" ) " );
// 切换flink-sql模式
tableEnv.getConfig().setSqlDialect(SqlDialect.DEFAULT);
// 注册 kafka-source
tableEnv.executeSql(
"CREATE TABLE IF NOT EXISTS kafka_test ( " +
" semesterId STRING, " +
" teacherId STRING, " +
" courseId STRING " +
" ) with ( " +
" 'connector' = 'kafka', " +