11、Spark_RDD算子——CoalesceAndRepartition

一、SparkUtils工具类

import org.apache.spark.{SparkConf, SparkContext}

object SparkUtils {
  /**
   *  默认的master url路径
   */
  val DEFAULT_MASTER = "local[*]"
  /**
   * 默认master为local[*]的获取sparkContext
   */
  def getSparkContext(appName:String):SparkContext = getSparkContext(appName, DEFAULT_MASTER)
  def getSparkContext(appName:String, master:String):SparkContext = new SparkContext(new SparkConf().setAppName(appName).setMaster(master))
  /**
   * 释放sparkContext
   */
  def close(sc:SparkContext) = if(sc != null) sc.stop()
}

二、日志工具

import org.apache.log4j.{Level, Logger}

trait LoggerTrait {
  Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
  Logger.getLogger("org.apache.hadoop").setLevel(Level.WARN)
  Logger.getLogger("org.spark_project").setLevel(Level.WARN)

}

三、Spark算子CoalesceAndRepartition

import cn.qphone.spark.common.LoggerTrait.LoggerTrait
import cn.qphone.spark.common.Utils.SparkUtils
import org.apache.spark.rdd.RDD

object Deom11_CoalesceAndRepartition extends LoggerTrait{
  def main(args: Array[String]): Unit = {
    //1.sparkcontext获取
    val sc = SparkUtils.getSparkContext("Deom11_CoalesceAndRepartition")
    //2.数据
    val list = 1.to(100000)
    //3.加载RDD
    val listRDD: RDD[Int] = sc.parallelize(list,5)
    println("分区数目:" + listRDD.getNumPartitions)
    val partRDD: RDD[Int] = listRDD.coalesce(3)
    println("分区数目:" + partRDD.getNumPartitions)

    //6.释放资源
    SparkUtils.close(sc)
  }
}

已标记关键词 清除标记
©️2020 CSDN 皮肤主题: 游动-白 设计师:上身试试 返回首页