1.开启hive元数据服务:
hive --service metastore &
2.将hive中的hive-site.xml导入到idea的resources目录下
3.修改主机映射master改为hive存在的ip(大坑)
4.导入pom依赖
这里有个坑注意版本一致性
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>2.4.5</version>
</dependency>
<!--导入sql依赖-->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId>
<version>2.4.5</version>
</dependency>
<!--spark和hive的依赖-->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-hive_2.11</artifactId>
<version>2.4.5</version>
</dependency>
<!--导入mysql依赖-->
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.27</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-cli</artifactId>
<version>1.2.1</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>1.2.1</version>
</dependency>
5.运行代码
import org.apache.spark.sql.{DataFrame, SparkSession}
import org.apache.spark.{SparkConf, SparkContext}
object step01 {
def main(args: Array[String]): Unit = {
System.setProperty("HADOOP_USER_NAME","master")
val sq: SparkSession = SparkSession
.builder()
.appName("Java Spark Hive Example")
.master("local[*]")
.config("spark.sql.warehouse.dir", "hdfs://user/hive/warehouse")
.config("driver","com.mysql.jdbc.Driver")
.enableHiveSupport()
.getOrCreate()
sq.sql("show databases").show()
sq.close()
}
}