假设某个物品属性有(商品名 库存 价格)
val sc = new SparkContext(new SparkConf().setMaster("local[2]").setAppName(this.getClass.getName))
// 商品名 库存 价格
val rdd = sc.parallelize(List("扑克牌 20 5","iphone11 999 6000","可乐 20 3","书本 35 66"))
方法一:
rdd.map(x =>{
val splits = x.split(" ")
val pname = splits(0)
val pnum = splits(1).toInt
val price = splits(2).toDouble
(pname,pnum,price)
}).sortBy(-_._2).
// 对数量进行倒序排序
// Array((iphone11,999,6000.0), (书本,35,66.0), (扑克牌,20,5.0), (可乐,20,3.0))
方法二:
// 自定义类实现 类继承Ordered 和 Serializable
class Products(val name:String,val pnum:Int,val price:Double ) extends Ordered[Products] with Serializable {
override def compare(that: Products): Int = {
-(this.pnum - that.pnum)
}
override def toString: String = name+"\t"+pnum+"\t"+price
}
rdd.map(x =>{
val splits = x.split(" ")
val pname = splits(0)
val pnum = splits(1).toInt
val price = splits(2).toDouble
new Products(pname,pnum,price)
}).sortBy(x => x).foreach(println)
方法三:
方法二有没有发现有点麻烦,对象还需要new,还需要继承 Serializable ,还需要自己重写toString 方法
// 采用case class 方式,只需要继承Ordered 其余的一概不用 (至于为啥 请看 [class与case class区别](https://blog.csdn.net/kzw11/article/details/100668699))
case class Products2(name:String,num:Int,price:Double) extends Ordered[Products2]{
override def compare(that: Products2): Int = {
this.num - that.num
}
}
rdd.map(x =>{
val splits = x.split(" ")
val pname = splits(0)
val pnum = splits(1).toInt
val price = splits(2).toDouble
Products2(pname,pnum,price)
}).sortBy(x => x).foreach(println)
方法四:
采用隐式转换方法,跟方法二差别不是很大
class Products3(val name:String,val num:Int,val price:Double) extends Serializable{
override def toString: String = name+"\t"+num+"\t"+price
}
rdd.map(x =>{
val splits = x.split(" ")
val pname = splits(0)
val pnum = splits(1).toInt
val price = splits(2).toDouble
new Products3(pname,pnum,price)
}).sortBy(x => x).foreach(println)
// 这里对Product3做增强,添加排序功能
implicit def product2ordered(prod: Products3):Ordered[Products3] = new Ordered[Products3]{
override def compare(that: Products3): Int = {
prod.num - that.num
}
}
方法五:
/**
* Ordering on
*
* -x._2, -x._3 排序规则
* (Double,Int) 定义的是规则的返回值的类型
* (String,Double,Int) 数据的类型
*/
implicit val ord = Ordering[(Int,Double)].on[(String,Int,Double)](x => (-x._2,-x._3))
rdd.sortBy(x=>x).foreach(println)