9、Spark_RDD算子——SortByKey

一、SparkUtils工具类

import org.apache.spark.{SparkConf, SparkContext}

object SparkUtils {
  /**
   *  默认的master url路径
   */
  val DEFAULT_MASTER = "local[*]"
  /**
   * 默认master为local[*]的获取sparkContext
   */
  def getSparkContext(appName:String):SparkContext = getSparkContext(appName, DEFAULT_MASTER)
  def getSparkContext(appName:String, master:String):SparkContext = new SparkContext(new SparkConf().setAppName(appName).setMaster(master))
  /**
   * 释放sparkContext
   */
  def close(sc:SparkContext) = if(sc != null) sc.stop()
}

二、日志工具

import org.apache.log4j.{Level, Logger}

trait LoggerTrait {
  Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
  Logger.getLogger("org.apache.hadoop").setLevel(Level.WARN)
  Logger.getLogger("org.spark_project").setLevel(Level.WARN)

}

三、Spark算子SortByKey

import cn.qphone.spark.common.LoggerTrait.LoggerTrait
import cn.qphone.spark.common.Utils.SparkUtils
import org.apache.spark.rdd.RDD

object Deom9_sortByKey extends LoggerTrait{
  def main(args: Array[String]): Unit = {
    //1.sparkcontext获取
   val sc = SparkUtils.getSparkContext("Deom9_sortByKey ")
    //2.数据 id,name,age,class
   val stuList = List(
     "1 令狐冲 22 华山",
     "2 岳不群 38 华山",
     "3 虚竹 33 逍遥",
     "4 乔峰 40 丐帮",
     "5 黄蓉 33 桃花岛",
     "6 杨过 11  古墓",
     "7 小龙女 13 古墓",
     "8 郭靖 34 丐帮"
   )
    val stuRDD: RDD[String] = sc.parallelize(stuList)
    val stusRDD: RDD[(String, (String, String, String))] = stuRDD.map(line => {
      val lines: Array[String] = line.split("\\s+")
      (lines(2), (lines(0), lines(1), lines(3)))
    })
    val sortRDD: RDD[(String, (String, String, String))] = stusRDD.sortByKey(true, 4)
    sortRDD.foreach(println)
      //sort只能保证分区内有序,多个分区无法保证

    //6.释放资源
    SparkUtils.close(sc)
  }
}
已标记关键词 清除标记
©️2020 CSDN 皮肤主题: 游动-白 设计师:上身试试 返回首页