Spark-Core自定义排序规则

1.利用case特性,定义一个类,在类中进行排序

package day02

import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

object UserOrdered01 {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setAppName("UserOrdered01").setMaster("local[*]")
    val sc =new SparkContext(conf)

    val arr=Array("1,小马,22,200","2,老马,22,300","3,中马,23,200")
    val lines: RDD[String] = sc.makeRDD(arr)
    val userRDD = lines.map(line => {
      val fields = line.split(",")
      val id = fields(0).toInt
      val name = fields(1)
      val age = fields(2).toInt
      val salary = fields(3).toInt
      User(id,name,age,salary)
    })
    val sorted = userRDD.sortBy(line=>line)
    sorted.collect().foreach(println(_))

    sc.stop()
  }
}
case class User(val id:Int,val name :String,val age:Int,val salary:Int) extends Ordered[User]{
  override def compare(that: User): Int = {
    if(this.age==that.age){
      that.salary -this.salary
    }else{
      this.age -that.age
    }
  }
  override def toString: String = s"User($id,$name,$age,$salary)"

}

 

2.根据sortBy的特性

package day02

import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.rdd.RDD

/**
  * 自定义排序规则
  * 利用sortBy不会改变数据类型,只会改变数据顺序来排序
  */
object UserOrder02 {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setAppName("UserOrdered01").setMaster("local[*]")
    val sc = new SparkContext(conf)

    val arr = Array("1,小马,22,200", "2,老马,22,300", "3,中马,23,200")
    val lines: RDD[String] = sc.makeRDD(arr)
    val userRDD = lines.map(line => {
      val fields = line.split(",")
      val id = fields(0).toInt
      val name = fields(1)
      val age = fields(2).toInt
      val salary = fields(3).toInt
      (id, name, age, salary)
    })
        //sortBy不会改变数据类型,,只会改变数据顺序(返回的还是元组)
      val sorted: RDD[(Int, String, Int, Int)] = userRDD.sortBy(t=>User2(t._1,t._2,t._3,t._4))
    sorted.collect().foreach(println(_))
    sc.stop()

  }
}
case class User2(val id:Int,val name :String,val age:Int,val salary:Int) extends Ordered[User2]{
  override def compare(that: User2): Int = {
    if(this.age==that.age){
      that.salary -this.salary
    }else{
      this.age -that.age
    }
  }
}

3.根据元组的特性

package day02

import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.rdd.RDD

/**
  * 利用元组的比较特点来排序
  * 排序规则:age按照升序,salary按照降序
  */
object UserOrder03 {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setAppName("UserOrdered01").setMaster("local[*]")
    val sc = new SparkContext(conf)

    val arr = Array("1,小马,22,200", "2,老马,22,300", "3,中马,23,200")
    val lines: RDD[String] = sc.makeRDD(arr)
    val userRDD = lines.map(line => {
      val fields = line.split(",")
      val id = fields(0).toInt
      val name = fields(1)
      val age = fields(2).toInt
      val salary = fields(3).toInt
      (id, name, age, salary)
    })
    //利用元组的比较的特点,先比较第一个参数,如果不相等,按照第一个属性排序,
    //如果相等,则按照第二个参数进行排序
    val sorted = userRDD.sortBy(t=>(t._3,-t._4))
    sorted.collect().foreach(println(_))
    //关闭资源
    sc.stop()
  }
}

4.利用隐式转换

package day02

import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.rdd.RDD

/**
  * 利用隐式转换进行排序
  */
object UserOrder04 {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setAppName("UserOrdered01").setMaster("local[*]")
    val sc = new SparkContext(conf)

    val arr = Array("1,小马,22,200", "2,老马,22,300", "3,中马,23,200")
    val lines: RDD[String] = sc.makeRDD(arr)
    val userRDD = lines.map(line => {
      val fields = line.split(",")
      val id = fields(0).toInt
      val name = fields(1)
      val age = fields(2).toInt
      val salary = fields(3).toInt
      (id, name, age, salary)
    })
    //隐式转换排序规则Ordering  调用Ordering中的on方法
    // def on[U](f : scala.Function1[U, T]) : scala.math.Ordering[U] =
    implicit val rules=Ordering[(Int,Int)].on[(Int,String,Int,Int)](t=>(t._3,-t._4))
    //自动根据类型进行匹配
    val sorted = userRDD.sortBy(user=>user)
    sorted.collect().foreach(println(_))
    //关闭资源
    sc.stop()


  }
}

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值