Spark Scala secondsort restart

secondsort

data

2 3
4 1
2 1
4 3
3 3


通过隐式转换重写Ordering的compare函数排序规则实现二次甚至三次五次排序
实现code如下

package one

import org.apache.spark.{SparkConf, SparkContext}

object secondsort {
  def main(args: Array[String]): Unit = {

    //本地运行spark
    val conf=new SparkConf().setAppName("sort").setMaster("local")
    val sc=new SparkContext(conf)

    //加载将文件并转为RDD
    val file=sc.textFile(args(0))

    //正则匹配
    val logRegex="""([0-9])\s([0-9])""".r()
    def u(line:String):(Int,Int)={
      logRegex.findFirstIn(line) match {
        case Some(logRegex(a,b))=>(a.toInt,b.toInt)
        case _=>(0,0)
      }
    }
    //利用隐式转换重写规则
    implicit val sort=new Ordering[(Int,Int)]{
      override def compare(x: (Int, Int), y: (Int, Int)): Int = {
        if(x._1>y._1)1 else {
          if(x._1==y._1&&x._2>y._2)1 else -1
        }
      }
    }
    //调用sortBy
    val res=file.map(line=>u(line)).sortBy(a=>a)
    //查看结果
res.foreach(println(_))
  }
}

result

(2,1)
(2,3)
(3,3)
(4,1)
(4,3)

©️2020 CSDN 皮肤主题: 编程工作室 设计师:CSDN官方博客 返回首页