1.连接hive数据库代码
import org.apache.spark.sql.{DataFrame, SparkSession}
/**
* 从hive中加载表成为dataframe
*/
object Demo12_CreateDF_HIVE {
def main(args: Array[String]): Unit = {
val spark: SparkSession = SparkSession
.builder()
.appName(this.getClass.getSimpleName)
.master("local[*]")
// 启用hive支持,需要调用enableHiveSupport,还需要添加一个依赖 spark-hive
// 默认sparksql内置了自己的hive
// 如果程序能从classpath中加载到hive-site配置文件,那么它访问的hive元数据库就不是本地内置的了,而是配置中所指定的元数据库了
// 如果程序能从classpath中加载到core-site配置文件,那么它访问的文件系统也不再是本地文件系统了,而是配置中所指定的hdfs文件系统了
.enableHiveSupport()
.getOrCreate()
val df: DataFrame = spark.sql("select * from student")
df.printSchema()
df.show()
spark.close()
}
}
2.载运行本地程序
2.1代码如下
import org.apache.spark.sql.{DataFrame, DataFrameReader, SparkSession}
/**
* 用原汁原味的sql语句来操作、运算dataframe中的数据 dml特定领域语言
*/
object Demo13_DML_SQL {
def main(args: Array[String]): Unit = {
val spark: SparkSession = SparkUtil.getSpark()
val dfr: DataFrameReader = spark.read.option("header","true")
val df: DataFrame = dfr.csv("data/stu2.csv")
// 将dataframe注册为临时视图
df.createTempView("p")
// spark.sql()
//spark.sql可以简写 。 前提需要导包
import spark.sql
sql("select * from p ").show()
sql(
"""
|select
|*
|from
|p
|order by score desc
|limit 2
|""".stripMargin).show()
// 将dataframe注册为全局临时视图,该视图会被绑定到 global_temp 库中
spark.close()
}
}
2.2 报错
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
Exception in thread "main" org.apache.spark.sql.AnalysisException: Path does not exist: hdfs://linux02:9000/user/Dell/data/stu2.csv;
at org.apache.spark.sql.execution.datasources.DataSource$.org$apache$spark$sql$execution$datasources$DataSource$$checkAndGlobPathIfNecessary(DataSource.scala:719)
at org.apache.spark.sql.execution.datasources.DataSource$$anonfun$15.apply(DataSource.scala:390)
at org.apache.spark.sql.execution.datasources.DataSource$$anonfun$15.apply(DataSource.scala:390)
at scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
at scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
at scala.collection.immutable.List.foreach(List.scala:381)
at scala.collection.TraversableLike$class.flatMap(TraversableLike.scala:241)
at scala.collection.immutable.List.flatMap(List.scala:344)
at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:389)
at org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:239)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:227)
at org.apache.spark.sql.DataFrameReader.csv(DataFrameReader.scala:596)
at org.apache.spark.sql.DataFrameReader.csv(DataFrameReader.scala:473)
2.3 解决办法
在导入的hive-site.html文件和Hadoop的core-site.html中查找问题
将图示部分注释掉。或者路径 :///