转载作者:Alen-Gao
转载地址:https://blog.csdn.net/sonicgyq_gyq/article/details/79239946
在实际操作中,sortBy算子往往不能满足数据多种排序的需求,这就需要我们使用自定义排序来实现,以下是实现简单的自定义排序的两种方法,起到抛砖引玉的作用。
第一种方法:(Ordered:自定义一个函数)
- package cn.allengao.IpSearch
- import org.apache.spark.{SparkConf, SparkContext}
- /**
- * class_name:
- * package:
- * describe: 自定义排序(首先比较攻击值,值高的排前面,如果攻击值相等,比较防守值,攻击值相同时,防守值高的排前面)
- * creat_user: Allen Gao
- * creat_date: 2018/2/2
- * creat_time: 11:01
- **/
- object CustomSort {
- def main(args: Array[String]): Unit = {
- val conf = new SparkConf().setAppName("CustomSort").setMaster("local[*]")
- val sc = new SparkContext(conf)
- //球员信息,姓名name,攻击值Attack,防守值defense。
- // (首先比较攻击值,值高的排前面,如果攻击值相等,比较防守值,攻击值相同时,防守值高的排前面)
- val playerInfo = sc.parallelize(Array(("Kobe", 98, 95),("O'Neal" , 98, 96),("Harden",95 ,95)))
- val res = playerInfo.sortBy(x => Player(x._2, x._3),false)
- println(res.collect().toBuffer)
- //执行结果:ArrayBuffer((O'Neal,98,96), (Kobe,98,95), (Harden,95,95))
- sc.stop()
- }
- }
- //第一种方式使用case class 模式匹配(Ordered 需要一个函数)
- case class Player(val attack: Int, val defense: Int) extends Ordered[Player] with Serializable {
- override def compare(that: Player) :Int = {
- if (this.attack == that.attack) {
- this.defense - that.defense
- } else {
- this.attack - that.attack
- }
- }
- }
第二种方法:(Ordering:implicit隐式转换值)
- package cn.allengao.IpSearch
- import org.apache.spark.{SparkConf, SparkContext}
- /**
- * class_name:
- * package:
- * describe: 自定义排序(首先比较攻击值,值高的排前面,如果攻击值相等,比较防守值,攻击值相同时,防守值高的排前面)
- * creat_user: Allen Gao
- * creat_date: 2018/2/2
- * creat_time: 11:01
- **/
- object MySort {
- implicit val playerOrdering = new Ordering[Player_1] {
- // implicit object PlayerOrdering extends Ordering[Player] {
- override def compare(x: Player_1, y: Player_1): Int = {
- if (x.attack > y.attack) 1
- else if (x.attack == y.attack) {
- if (x.defense > y.defense) 1 else -1
- } else -1
- }
- }
- }
- object CustomSort_1 {
- def main(args: Array[String]): Unit = {
- val conf = new SparkConf().setAppName("CustomSort").setMaster("local[*]")
- val sc = new SparkContext(conf)
- //球员信息,姓名name,攻击值Attack,防守值defense。
- // (首先比较攻击值,值高的排前面,如果攻击值相等,比较防守值,攻击值相同时,防守值高的排前面)
- val playerInfo = sc.parallelize(Array(("Kobe", 98, 95), ("O'Neal", 98, 96), ("Harden", 95, 95)))
- import MySort._
- val res = playerInfo.sortBy(x => Player_1(x._2, x._3), false)
- println(res.collect().toBuffer)
- //执行结果:ArrayBuffer((O'Neal,98,96), (Kobe,98,95), (Harden,95,95))
- sc.stop()
- }
- }
- //第二种方式:使用隐式转换(Ordering需要一个隐式转换值)
- case class Player_1(attack: Int, defense: Int) extends Serializable