package test import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.api.java.JavaSparkContext import org.apache.spark.rdd.RDD /** * 按照输入数据的第一列和第二列的指定规则排序 * 第一列正序 第二列倒序 * 步骤 1:自定义一个排序规则和实体类 实现排序功能 * 2:使用算子计算 */ object SecondSort_Scala { def main(args: Array[String]) :Unit={ val conf: SparkConf = new SparkConf().setMaster("local").setAppName("SecondSort") val sc: SparkContext = new SparkContext(conf) val textFileRDD: RDD[String] = sc.textFile("D:\\4.txt") val mapRDD = textFileRDD.map(line=> { (new SecondSort(line.split(" ")(0).toInt, line.split(" ")(1).toInt ,line.split(" ")(2).toInt), line) }); val sortByKeyRDD: RDD[(SecondSort, String)] = mapRDD.sortByKey(true) val mapValueRDD: RDD[String] = sortByKeyRDD.map(_._2) // println(mapValueRDD.collect().toList.toString) mapValueRDD.foreach(println) sc.stop() } } class SecondSort (val firstNum : Int , val secondNum : Int , val thirdNum : Int) extends Ordered[SecondSort] with Serializable{ override def compare(that: SecondSort): Int = { var result : Int = this.firstNum - that.firstNum if(result == 0){ result = that.secondNum - this.secondNum if(result == 0){ result = that.thirdNum- this.secondNum return result } } result }
}
排序前数据
1 11 1 1 11 2 2 11 3 2 44 4 3 55 5 3 66 6 4 77 7 4 88 8
排序后结果
1 11 2 1 11 1 2 44 4 2 11 3 3 66 6 3 55 5 4 88 8 4 77 7