为了省去每次都创建spark对象,写了一个基础类SparkSessionBase,没想到抛出异常:A master URL must be set in your configuration。通过查看spark的初始化过程,发现在于spark对象在main函数之外创建,则spark中的SparkContext对象在创建过程中传入的SparkConf参数不包含spark.master参数。也就是说在提交任务的时候虽然指定了–master yarn-client的,但是sparkContext对象创建时,并没有读取该参数。
进一步看从org.apache.spark.launcher.Main -> prepareBashCommand
//Error Code
class SparkSessionBase {
protected val spark:SparkSession =SparkSession.builder().enableHiveSupport().getOrCreate();
}
object ImportHiveData2Kafka extends SparkSessionBase {
def main(args: Array[String]): Unit = {
val topic = args(0)
val dataType = args(1)
val dataSourceType = args(2)
var infos: Dataset[String] = null
dataSourceType match {
case "parquet" => {infos = spark.read.load(args(3)).toJSON}
case "hive" => {infos = spark.sql("select * from " + args(3) + " where dt='" + args(4) +"'").toJSON}
case _ => null
}
infos.foreachPartition(info => importToKafka(topic, dataType, info.toList))
spark.close()
}
}