使用flink连接hive调试了大半天,主要是官网的文档很不详细(缺少各种maven依赖),做个记录,供大家参考。
- 添加Maven依赖
<!--flink_hive 模块引入-->
<!-- Flink Dependency -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-hive_2.11</artifactId>
<!--2.11是Scala的版本-->
<version>1.11.0</version>
<!--1.11.0是flink的版本-->
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-java-bridge_2.11</artifactId>
<version>1.11.0</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner-blink_2.11</artifactId>
<version>1.11.0</version>
</dependency>
<!-- Hive Dependency -->
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>3.1.0</version>
<!--3.1.0是Hive的版本-->
</dependency>
<!--Hadoop Dependency-->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>3.1.0</version>
<!--3.1.0是Hadoop的版本-->
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>3.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-common</artifactId>
<version>3.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-jobclient</artifactId>
<version>3.1.0</version>
</dependency>
<!--flink_hive 模块引入-->
- Flink连接Hive的Scala代码
import org.apache.flink.table.api.{EnvironmentSettings, TableEnvironment}
import org.apache.flink.table.catalog.hive.HiveCatalog
object flink_hive {
def main(args: Array[String]): Unit = {
val settings = EnvironmentSettings.newInstance().useBlinkPlanner().inBatchMode().build()
//val settings = EnvironmentSettings.newInstance().useOldPlanner().inStreamingMode().build()
val tableEnv = TableEnvironment.create(settings)
val name = "myhive"
val defaultDatabase = "mydb" //选择要连接的hive数据库
val hiveConfDir = "hadooptest/hive-conf" //需要把集群的hive-site.xml下载下来放到这个本地目录下面。
val version = "3.1.0" //hive的版本
val hive = new HiveCatalog(name, defaultDatabase, hiveConfDir, version)
tableEnv.registerCatalog("myhive", hive)
tableEnv.useCatalog("myhive")
tableEnv.useDatabase("mydb")
val sql_hive = "select u.id,u.name,a.age from t_user u left join t_age a on u.id = a.id"
tableEnv.executeSql(sql_hive).print()
}
}
- 注意
需要将集群上的hive的conf下的hive-site.xml下载到本地放到指定的目录中。