1.利用case特性,定义一个类,在类中进行排序
package day02
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
object UserOrdered01 {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("UserOrdered01").setMaster("local[*]")
val sc =new SparkContext(conf)
val arr=Array("1,小马,22,200","2,老马,22,300","3,中马,23,200")
val lines: RDD[String] = sc.makeRDD(arr)
val userRDD = lines.map(line => {
val fields = line.split(",")
val id = fields(0).toInt
val name = fields(1)
val age = fields(2).toInt
val salary = fields(3).toInt
User(id,name,age,salary)
})
val sorted = userRDD.sortBy(line=>line)
sorted.collect().foreach(println(_))
sc.stop()
}
}
case class User(val id:Int,val name :String,val age:Int,val salary:Int) extends Ordered[User]{
override def compare(that: User): Int = {
if(this.age==that.age){
that.salary -this.salary
}else{
this.age -that.age
}
}
override def toString: String = s"User($id,$name,$age,$salary)"
}
2.根据sortBy的特性
package day02
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.rdd.RDD
/**
* 自定义排序规则
* 利用sortBy不会改变数据类型,只会改变数据顺序来排序
*/
object UserOrder02 {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("UserOrdered01").setMaster("local[*]")
val sc = new SparkContext(conf)
val arr = Array("1,小马,22,200", "2,老马,22,300", "3,中马,23,200")
val lines: RDD[String] = sc.makeRDD(arr)
val userRDD = lines.map(line => {
val fields = line.split(",")
val id = fields(0).toInt
val name = fields(1)
val age = fields(2).toInt
val salary = fields(3).toInt
(id, name, age, salary)
})
//sortBy不会改变数据类型,,只会改变数据顺序(返回的还是元组)
val sorted: RDD[(Int, String, Int, Int)] = userRDD.sortBy(t=>User2(t._1,t._2,t._3,t._4))
sorted.collect().foreach(println(_))
sc.stop()
}
}
case class User2(val id:Int,val name :String,val age:Int,val salary:Int) extends Ordered[User2]{
override def compare(that: User2): Int = {
if(this.age==that.age){
that.salary -this.salary
}else{
this.age -that.age
}
}
}
3.根据元组的特性
package day02
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.rdd.RDD
/**
* 利用元组的比较特点来排序
* 排序规则:age按照升序,salary按照降序
*/
object UserOrder03 {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("UserOrdered01").setMaster("local[*]")
val sc = new SparkContext(conf)
val arr = Array("1,小马,22,200", "2,老马,22,300", "3,中马,23,200")
val lines: RDD[String] = sc.makeRDD(arr)
val userRDD = lines.map(line => {
val fields = line.split(",")
val id = fields(0).toInt
val name = fields(1)
val age = fields(2).toInt
val salary = fields(3).toInt
(id, name, age, salary)
})
//利用元组的比较的特点,先比较第一个参数,如果不相等,按照第一个属性排序,
//如果相等,则按照第二个参数进行排序
val sorted = userRDD.sortBy(t=>(t._3,-t._4))
sorted.collect().foreach(println(_))
//关闭资源
sc.stop()
}
}
4.利用隐式转换
package day02
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.rdd.RDD
/**
* 利用隐式转换进行排序
*/
object UserOrder04 {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("UserOrdered01").setMaster("local[*]")
val sc = new SparkContext(conf)
val arr = Array("1,小马,22,200", "2,老马,22,300", "3,中马,23,200")
val lines: RDD[String] = sc.makeRDD(arr)
val userRDD = lines.map(line => {
val fields = line.split(",")
val id = fields(0).toInt
val name = fields(1)
val age = fields(2).toInt
val salary = fields(3).toInt
(id, name, age, salary)
})
//隐式转换排序规则Ordering 调用Ordering中的on方法
// def on[U](f : scala.Function1[U, T]) : scala.math.Ordering[U] =
implicit val rules=Ordering[(Int,Int)].on[(Int,String,Int,Int)](t=>(t._3,-t._4))
//自动根据类型进行匹配
val sorted = userRDD.sortBy(user=>user)
sorted.collect().foreach(println(_))
//关闭资源
sc.stop()
}
}