先创建一个rank spec
// 用df("salary").asc代表升序排列,desc代表降序
val rankSpec = Window.partitionBy("age").orderBy(df("salary").desc)
然后为自己的表添加一列rank
val sararyRank = df.withColumn("rank",dense_rank().over(rankSpec))
完整代码:
/*
* 利用spark读取txt文件的内容,生成rdd
* 并用case class的定义去将rdd转化为dataFrame
*
*
* 如果读入一个没有文件头,要另外给schema
* */
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.log4j._
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.expressions.Window
import org.apache.spark.sql.functions._
case class people(id:Int, name:String, age:Int)
case class salary (name:String, salary:Int)
case class peopleSalary(id:Int, name:String, age:Option[Int], salary:Int)
object MyScalar {
def main(args: Array[String]): Unit = {
Logger.getLogge