spark java.lang.ExceptionInInitializerError 累加器使用

记一次spark 累加器使用记录

正确代码:

object SparkTest {
  val sparkConf: SparkConf = new SparkConf()
            .setMaster("local[3]")
    .setAppName("Test")
  val session: SparkSession = SparkSession.builder().config(sparkConf).getOrCreate()
  val longAccumulator: LongAccumulator =session.sparkContext.longAccumulator("longAccumulator")

  def count(longAccumulator: LongAccumulator): Unit = {
    val arr = Array[Int](1, 2, 3)
    val arrRdd = session.sparkContext.parallelize(arr)
    val res = arrRdd.map(m => {
      longAccumulator.add(1)
    }).cache().collect()
    println(longAccumulator.value)
  }

  def main(args: Array[String]): Unit = {
    count(longAccumulator)
    println(longAccumulator.value)
  }
}

原始代码:

object SparkTest {
  val sparkConf: SparkConf = new SparkConf()
//            .setMaster("local[3]")
    .setAppName("Test")
  val session: SparkSession = SparkSession.builder().config(sparkConf).getOrCreate()
  val longAccumulator: LongAccumulator = session.sparkContext.longAccumulator("longAccumulator")

  def count(): Unit = {
    val arr = Array[Int](1, 2, 3)
    val arrRdd = session.sparkContext.parallelize(arr)
    val res = arrRdd.map(m => {
      longAccumulator.add(m)
    }).cache().repartition(1).collect()
    println(longAccumulator.value)
  }

  def main(args: Array[String]): Unit = {
    count()
    println(longAccumulator.value)
  }
}

这段代码,本地local模式没有问题,但是打包上传到集群,使用 yarn-cluster模式,就会有问题:

下面是报错日志:报错第50行:longAccumulator.add(m) 

由于有注释,我把代码里的注释删掉了。所以其实50行是 累加器进行加的这行。

20/07/29 16:29:27 WARN scheduler.TaskSetManager: Lost task 0.0 in stage 0.0 (TID 0, executor 1): java.lang.ExceptionInInitializerError
	at sparksql.SparkTest$$anonfun$1.apply$mcVI$sp(SparkTest.scala:50)
	at sparksql.SparkTest$$anonfun$1.apply(SparkTest.scala:49)
	at sparksql.SparkTest$$anonfun$1.apply(SparkTest.scala:49)
	at scala.collection.Iterator$$anon$11.next(Iterator.scala:409)
	at org.apache.spark.storage.memory.MemoryStore.putIteratorAsValues(MemoryStore.scala:216)
	at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1038)
	at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1029)
	at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:969)
	at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1029)
	at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:760)
	at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:334)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:285)
	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
	at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96)
	at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53)
	at org.apache.spark.scheduler.Task.run(Task.scala:108)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
	at java.lang.Thread.run(Thread.java:748)
Caused by: org.apache.spark.SparkException: A master URL must be set in your configuration
	at org.apache.spark.SparkContext.<init>(SparkContext.scala:376)
	at org.apache.spark.SparkContext$.getOrCreate(SparkContext.scala:2509)
	at org.apache.spark.sql.SparkSession$Builder$$anonfun$6.apply(SparkSession.scala:909)
	at org.apache.spark.sql.SparkSession$Builder$$anonfun$6.apply(SparkSession.scala:901)
	at scala.Option.getOrElse(Option.scala:121)
	at org.apache.spark.sql.SparkSession$Builder.getOrCreate(SparkSession.scala:901)
	at sparksql.SparkTest$.<init>(SparkTest.scala:43)
	at sparksql.SparkTest$.<clinit>(SparkTest.scala)
	... 22 more

20/07/29 16:29:27 INFO scheduler.TaskSetManager: Starting task 0.1 in stage 0.0 (TID 2, staging-bigdata06, executor 1, partition 0, PROCESS_LOCAL, 4702 bytes)
20/07/29 16:29:27 WARN scheduler.TaskSetManager: Lost task 0.1 in stage 0.0 (TID 2, staging-bigdata06, executor 1): java.lang.NoClassDefFoundError: Could not initialize class sparksql.SparkTest$
	at sparksql.SparkTest$$anonfun$1.apply$mcVI$sp(SparkTest.scala:50)
	at sparksql.SparkTest$$anonfun$1.apply(SparkTest.scala:49)
	at sparksql.SparkTest$$anonfun$1.apply(SparkTest.scala:49)
	at scala.collection.Iterator$$anon$11.next(Iterator.scala:409)
	at org.apache.spark.storage.memory.MemoryStore.putIteratorAsValues(MemoryStore.scala:216)
	at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1038)
	at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1029)
	at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:969)
	at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1029)
	at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:760)
	at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:334)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:285)
	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
	at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96)
	at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53)
	at org.apache.spark.scheduler.Task.run(Task.scala:108)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
	at java.lang.Thread.run(Thread.java:748)

我理解的是在集群模式时,只有main方法可以加载到这个定意思的累加器,而其他方法想要使用,就需要在mian里调用的时候传进去,才可以用。报的错只是显示这行,但是错误是根本看不出来累加器有问题。。。网上很多网友说这个错是变量的问题,所以刚开始找的时候就没有注意累加器。

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值