sparksql开窗函数和udf自定义函数

package com.lenovo.sparkSqlDemo

import org.apache.spark.sql.SparkSession

//spark常用开窗函数
object SparkSqlTest2 {
  case class Person(id:Int,name:String,age:Int,salary:Double,sex:Boolean,work:String,dept:Int)
  def main(args: Array[String]): Unit = {
    val spark = SparkSession.builder().master("local").appName("testSparkSql").getOrCreate()
    val persons = List(
      new Person(1, "suns1", 34, 8022.0, true, "teacher", 1),
      new Person(2, "suns2", 33, 8021.0, false, "manager", 2),
      new Person(3, "suns3", 32, 8022.0, true, "teacher", 1),
      new Person(4, "suns4", 35, 8025.0, false, "manager", 2),
      new Person(5, "suns5", 38, 8024.0, true, "teacher", 1),
      new Person(6, "suns6", 39, 8026.0, true, "woker", 3)
    )
    spark.createDataFrame(persons).createTempView("person")
    /*//先分组,然后按照降序(升序)排序,再查询
    spark.sql("select * from (select *,row_number() over(partition by dept order by salary asc) as rank from person) t where t.rank<2").show()*/
    /*//计算薪资排名(不会空出并列名次如 123445)
    spark.sql("select *,DENSE_RANK() over(order by salary DESC) rank from person").show()*/
    /*//计算薪资排名(空出并列名次如 123446)
    spark.sql("select *,rank() over(order by salary DESC) rank from person").show()*/
    /*//自定义函数的定义及使用
    spark.udf.register("yearSalary",(work:String,salary:Double)=>{
      work match {
        case "teacher" => salary*14
        case "manager" => salary*15
        case "work" => salary*13
        case _ => salary*12
      }
    })
    spark.sql("select *,yearSalary(work,salary) as yearSalary from person").show()*/
    //按工资排名,并计算每个职位的平均工资
    spark.sql("select *,avg(salary) over(partition by work) as avgSalary from person").show()
    spark.sparkContext.setLogLevel("FATAL")
    spark.stop()
  }
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值