package com.ws.demo
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
/**
* 自定义排序
* 排序规则 : 按分数降序,分数相同,年龄升序
*/
object CustomSort {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("CustomSort").setMaster("local[4]")
val sc = new SparkContext(conf)
val data = Array("ws 18 150", "tt 19 145", "nn 20 130", "bb 16 120", "cb 19 150")
val dataRdd: RDD[String] = sc.parallelize(data)
//切数据
val studentRdd: RDD[Student] = dataRdd.map(line => {
val lineArr = line.split(" ")
val name = lineArr(0)
val age = lineArr(1).toInt
val score = lineArr(2).toInt
new Student(name, age, score)
})
//排序
val sort: RDD[Student] = studentRdd.sortBy(s => s)
val students: Array[Student] = sort.collect()
println(students.toBuffer)
sc.stop()
}
}
/**
* 实现排序类以及序列化,否则报错,参考hadoop
*/
class Student(val name: String, val age: Int, val score: Int) extends Ordered[Student] with Serializable {
//重写排序规则
override def compare(that: Student): Int = {
if (this.score == that.score) {
this.age - that.age
} else {
-(this.score - that.score)
}
}
override def toString: String = s"{ name : $name , age : $age , score : $score } \n"
}
代码结果输出:
ArrayBuffer(
{ name : ws , age : 18 , score : 150 } ,
{ name : cb , age : 19 , score : 150 } ,
{ name : tt , age : 19 , score : 145 } ,
{ name : nn , age : 20 , score : 130 } ,
{ name : bb , age : 16 , score : 120 }
)