package com.sf.gis.scala.base.spark import org.apache.spark.SparkConf import org.apache.spark.sql.SparkSession import org.apache.log4j.Logger /** * Created by 01374443 on 2020/7/27. */ object Spark { @transient lazy val logger: Logger = Logger.getLogger(this.getClass) /** * 获取spark session * * @param appName : spark应用名称 * @param confMap : spark配置,null则为默认配置 * @param isLocal : 是否本地模式,true:本地模式,false:集群模式 * @return */ def getSparkSession(appName: String, confMap: Map[String, String] = null, isLocal: Boolean = false, localThread: Int = 2): SparkSession = { var sparkSession: SparkSession = null if (isLocal) { sparkSession = SparkSession.builder().config(getSparkConf(appName, confMap).setMaster("local[" + localThread + "]")).getOrCreate() } else { sparkSession = SparkSession.builder().config(getSparkConf(appName, confMap)).enableHiveSupport().getOrCreate() } sparkSession.sparkContext.setLogLevel("ERROR") logger.error("conf list:") val confAll = sparkSession.conf.getAll for (confName <- confAll.keySet) { logger.error(confName + ": " + confAll.apply(confName)) } sparkSession } def getSparkConf(appName: String, confMap: Map[String, String]): SparkConf = { val conf = new SparkConf().setAppName(appName) .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") .set("spark.scheduler.maxRegisteredResourcesWaitingTime", "90000") .set("spark.port.maxRetries", "100") .set("spark.driver.maxResultSize", "12g") .set("spark.rpc.io.backLog", "10000") .set("spark.cleaner.referenceTracking.blocking", "false") .set("spark.streaming.stopGracefullyOnShutdown", "true") .set("spark.io.compression.codec", "org.apache.spark.io.SnappyCompressionCodec") .set("spark.driver.allowMultipleContexts", "true") .set("spark.sql.tungsten.enabled", "false") .set("quota.producer.default", (10485760 * 2).toString) // default is 10485760 .set("quota.consumer.default", (10485760 * 2).toString) .set("cache.max.bytes.buffering", (20485760 * 2).toString) .set("spark.sql.broadcastTimeout", "36000") .set("spark.network.timeout", "6000") .set("spark.executor.heartbeatInterval", "30000") .set("hive.exec.dynamic.partition", "true") .set("hive.exec.dynamic.partition.mode", "nonstrict") .set("spark.executor.extraJavaOptions", "-XX:+PrintFlagsFinal -XX:+PrintReferenceGC -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintAdaptiveSizePolicy -XX:+UnlockDiagnosticVMOptions -XX:+G1SummarizeConcMark") .set("spark.driver.extraJavaOptions", "-XX:+PrintFlagsFinal -XX:+PrintReferenceGC -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintAdaptiveSizePolicy -XX:+UnlockDiagnosticVMOptions -XX:+G1SummarizeConcMark") if (confMap != null) { for (confName <- confMap.keySet) { conf.set(confName, confMap.apply(confName)) } } conf } }