package com.lenovo.sparkSqlDemo
import org.apache.spark.sql.SparkSession
//spark常用开窗函数
object SparkSqlTest2 {
case class Person(id:Int,name:String,age:Int,salary:Double,sex:Boolean,work:String,dept:Int)
def main(args: Array[String]): Unit = {
val spark = SparkSession.builder().master("local").appName("testSparkSql").getOrCreate()
val persons = List(
new Person(1, "suns1", 34, 8022.0, true, "teacher", 1),
new Person(2, "suns2", 33, 8021.0, false, "manager", 2),
new Person(3, "suns3", 32, 8022.0, true, "teacher", 1),
new Person(4, "suns4", 35, 8025.0, false, "manager", 2),
new Person(5, "suns5", 38, 8024.0, true, "teacher", 1),
new Person(6, "suns6", 39, 8026.0, true, "woker", 3)
)
spark.createDataFrame(persons).createTempView("person")
/*//先分组,然后按照降序(升序)排序,再查询
spark.sql("select * from (select *,row_number() over(partition by dept order by salary asc) as rank from person) t where t.rank<2").show()*/
/*//计算薪资排名(不会空出并列名次如 123445)
spark.sql("select *,DENSE_RANK() over(order by salary DESC) rank from person").show()*/
/*//计算薪资排名(空出并列名次如 123446)
spark.sql("select *,rank() over(order by salary DESC) rank from person").show()*/
/*//自定义函数的定义及使用
spark.udf.register("yearSalary",(work:String,salary:Double)=>{
work match {
case "teacher" => salary*14
case "manager" => salary*15
case "work" => salary*13
case _ => salary*12
}
})
spark.sql("select *,yearSalary(work,salary) as yearSalary from person").show()*/
//按工资排名,并计算每个职位的平均工资
spark.sql("select *,avg(salary) over(partition by work) as avgSalary from person").show()
spark.sparkContext.setLogLevel("FATAL")
spark.stop()
}
}
sparksql开窗函数和udf自定义函数
最新推荐文章于 2024-04-01 11:20:57 发布