package com.shujia.sql
import org.apache.spark.sql.expressions.Window
import org.apache.spark.sql.{DataFrame, SparkSession}
object Demo06SparkOnHive {
def main(args: Array[String]): Unit = {
/**
* 通过enableHiveSupport()可以开启Hive的支持
* 需要在pom文件中加入spark-hive的依赖
* <dependency>
* <groupId>org.apache.spark</groupId>
* <artifactId>spark-hive_2.11</artifactId>
* <version>2.4.5</version>
* </dependency>
* 需要启动hive的metastore
* hive --service metastore
*
*/
val spark: SparkSession = SparkSession
.builder()
.appName("Demo06HiveOnSpark")
.master("local")
.enableHiveSupport() // 开启Hive的支持
.config("spark.sql.shuffle.partitions", 2) // 默认200
.getOrCreate()
import org.apache.spark.sql.functions._
import spark.implicits._
spark.sql("show databases").show()
spark.sql("show tables").show()
spark.sql("use stu_spark").show()
spark.sql("show tables").show()
// 统计每个科目排名前十的学生 分组取topN
spark.sql(
"""
|select t1.student_id
| ,t1.cource_id
| ,t1.sco
| ,t1.rn
|from (
| select student_id
| ,cource_id
| ,sco
| ,row_number() over(partition by cource_id order by sco desc) as rn
| from score
|) t1 where rn<=10
|""".stripMargin).show()
// DSL
val scoDF: DataFrame = spark.table("score")
scoDF
.select($"student_id", $"cource_id", $"sco",
row_number() over Window.partitionBy($"cource_id").orderBy($"sco".desc) as "rn")
.where($"rn" <= 10)
.show()
}
}
+------------+
|databaseName|
+------------+
| default|
| stu|
| stu_spark|
| test1|
| testsqoop|
+------------+
+--------+---------------+-----------+
|database| tableName|isTemporary|
+--------+---------------+-----------+
| default|ods_yiqing_data| false|
| default| student| false|
| default| students_hbase| false|
+--------+---------------+-----------+
++
||
++
++
+---------+---------+-----------+
| database|tableName|isTemporary|
+---------+---------+-----------+
|stu_spark| score| false|
|stu_spark| student| false|
+---------+---------+-----------+
+----------+---------+---+---+
|student_id|cource_id|sco| rn|
+----------+---------+---+---+
|1500100007| 1000002|149| 1|
|1500100080| 1000002|149| 2|
|1500100154| 1000002|149| 3|
|1500100206| 1000002|149| 4|
|1500100298| 1000002|149| 5|
|1500100300| 1000002|149| 6|
|1500100547| 1000002|149| 7|
|1500100599| 1000002|149| 8|
|1500100641| 1000002|149| 9|
|1500100679| 1000002|149| 10|
|1500100208| 1000003|149| 1|
|1500100572| 1000003|149| 2|
|1500100616| 1000003|149| 3|
|1500100776| 1000003|149| 4|
|1500100867| 1000003|149| 5|
|1500100871| 1000003|149| 6|
|1500100911| 1000003|149| 7|
|1500100072| 1000003|148| 8|
|1500100645| 1000003|148| 9|
|1500100725| 1000003|148| 10|
+----------+---------+---+---+
only showing top 20 rows
+----------+---------+---+---+
|student_id|cource_id|sco| rn|
+----------+---------+---+---+
|1500100007| 1000002|149| 1|
|1500100080| 1000002|149| 2|
|1500100154| 1000002|149| 3|
|1500100206| 1000002|149| 4|
|1500100298| 1000002|149| 5|
|1500100300| 1000002|149| 6|
|1500100547| 1000002|149| 7|
|1500100599| 1000002|149| 8|
|1500100641| 1000002|149| 9|
|1500100679| 1000002|149| 10|
|1500100208| 1000003|149| 1|
|1500100572| 1000003|149| 2|
|1500100616| 1000003|149| 3|
|1500100776| 1000003|149| 4|
|1500100867| 1000003|149| 5|
|1500100871| 1000003|149| 6|
|1500100911| 1000003|149| 7|
|1500100072| 1000003|148| 8|
|1500100645| 1000003|148| 9|
|1500100725| 1000003|148| 10|
+----------+---------+---+---+
only showing top 20 rows