spark任务报错
Job aborted due to stage failure: Task 2049 in stage 515.0 failed 5 times, most recent
failure: Lost task 2049.3 in stage 515.0 (TID 241301, n11-147-025.byted.org, executor
1078): java.lang.RuntimeException: Cannot reserve additional contiguous bytes in the
vectorized reader (requested 590748783 bytes). As a workaround, you can disable the
vectorized reader. For parquet file format, refer to
spark.sql.parquet.enableVectorizedReader; for orc file format, refer to
spark.sql.orc.enableVectorizedReader.
details
Job aborted due to stage failure: Task 2049 in stage 515.0 failed 5 times, most recent failure: Lost task 2049.3 in stage 515.0 (TID 241301, n11-147-025.byted.org, executor 1078): java.lang.RuntimeException: Cannot reserve additional contiguous bytes in the vectorized reader (requested 590748783 bytes). As a workaround, you can disable the vectorized reader. For parquet file format, refer to spark.sql.parquet.enableVectorizedReader; for orc file format, refer to spark.sql.orc.enableVectorizedReader.
at org.apache.spark.sql.execution.vectorized.WritableColumnVector.throwUnsupportedException(WritableColumnVector.java:106)
at org.apache.spark.sql.execution.vectorized.WritableColumnVector.reserve(WritableColumnVector.java:92)
at org.apache.spark.sql.execution.vectorized.WritableColumnVector.appendBytes(WritableColumnVector.java:471)
at org.apache.spark.sql.execution.vectorized.OnHeapColumnVector.putByteArray(OnHeapColumnVector.java:497)
at org.apache.spark.sql.execution.datasources.parquet.VectorizedPlainValuesReader.readBinary(VectorizedPlainValuesReader.java:201)
at org.apache.spark.sql.execution.datasources.parquet.VectorizedRleValuesReader.readBinary(VectorizedRleValuesReader.java:505)
at org.apache.spark.sql.execution.datasources.parquet.VectorizedColumnReader.readBinaryBatch(VectorizedColumnReader.java:632)
at org.apache.spark.sql.execution.datasources.parquet.VectorizedColumnReader.readBatch(VectorizedColumnReader.java:253)
at org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetPartialRecordReader.readBatch(VectorizedParquetPartialRecordReader.java:183)
at org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetPartialRecordReader.readSpecificBatch(VectorizedParquetPartialRecordReader.java:158)
at org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.nextBatch(VectorizedParquetRecordReader.java:320)
at org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.nextKeyValue(VectorizedParquetRecordReader.java:195)
at org.apache.spark.sql.execution.datasources.RecordReaderIterator.hasNext(RecordReaderIterator.scala:39)
at org.apache.spark.sql.execution.datasources.NormalFileScanIterator$$anonfun$1.apply$mcZ$sp(NormalFileScanIterator.scala:50)
at org.apache.spark.sql.execution.datasources.NormalFileScanIterator$$anonfun$1.apply(NormalFileScanIterator.scala:50)
at org.apache.spark.sql.execution.datasources.NormalFileScanIterator$$anonfun$1.apply(NormalFileScanIterator.scala:50)
at org.apache.spark.sql.execution.datasources.FileScanIterator.traceReadTime(FileScanIterator.scala:129)
at org.apache.spark.sql.execution.datasources.NormalFileScanIterator.hasNext(NormalFileScanIterator.scala:50)
at org.apache.spark.sql.execution.datasources.NormalFileScanIterator.nextIterator(NormalFileScanIterator.scala:80)
at org.apache.spark.sql.execution.datasources.NormalFileScanIterator.hasNext(NormalFileScanIterator.scala:51)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage0.scan_nextBatch_0$(Unknown Source)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage0.processNext(Unknown Source)
at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$12$$anon$1.hasNext(WholeStageCodegenExec.scala:731)
at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:461)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
at org.apache.spark.shuffle.sort.UnsafeShuffleWriter.write(UnsafeShuffleWriter.java:198)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53)
at org.apache.spark.scheduler.Task.run(Task.scala:112)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:359)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.OutOfMemoryError: Java heap space
at org.apache.spark.sql.execution.vectorized.OnHeapColumnVector.reserveInternal(OnHeapColumnVector.java:565)
at org.apache.spark.sql.execution.vectorized.WritableColumnVector.reserve(WritableColumnVector.java:90)
at org.apache.spark.sql.execution.vectorized.WritableColumnVector.appendBytes(WritableColumnVector.java:471)
at org.apache.spark.sql.execution.vectorized.OnHeapColumnVector.putByteArray(OnHeapColumnVector.java:497)
at org.apache.spark.sql.execution.datasources.parquet.VectorizedPlainValuesReader.readBinary(VectorizedPlainValuesReader.java:201)
at org.apache.spark.sql.execution.datasources.parquet.VectorizedRleValuesReader.readBinary(VectorizedRleValuesReader.java:505)
at org.apache.spark.sql.execution.datasources.parquet.VectorizedColumnReader.readBinaryBatch(VectorizedColumnReader.java:632)
at org.apache.spark.sql.execution.datasources.parquet.VectorizedColumnReader.readBatch(VectorizedColumnReader.java:253)
at org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetPartialRecordReader.readBatch(VectorizedParquetPartialRecordReader.java:183)
at org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetPartialRecordReader.readSpecificBatch(VectorizedParquetPartialRecordReader.java:158)
at org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.nextBatch(VectorizedParquetRecordReader.java:320)
at org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.nextKeyValue(VectorizedParquetRecordReader.java:195)
at org.apache.spark.sql.execution.datasources.RecordReaderIterator.hasNext(RecordReaderIterator.scala:39)
at org.apache.spark.sql.execution.datasources.NormalFileScanIterator$$anonfun$1.apply$mcZ$sp(NormalFileScanIterator.scala:50)
at org.apache.spark.sql.execution.datasources.NormalFileScanIterator$$anonfun$1.apply(NormalFileScanIterator.scala:50)
at org.apache.spark.sql.execution.datasources.NormalFileScanIterator$$anonfun$1.apply(NormalFileScanIterator.scala:50)
at org.apache.spark.sql.execution.datasources.FileScanIterator.traceReadTime(FileScanIterator.scala:129)
at org.apache.spark.sql.execution.datasources.NormalFileScanIterator.hasNext(NormalFileScanIterator.scala:50)
at org.apache.spark.sql.execution.datasources.NormalFileScanIterator.nextIterator(NormalFileScanIterator.scala:80)
at org.apache.spark.sql.execution.datasources.NormalFileScanIterator.hasNext(NormalFileScanIterator.scala:51)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage0.scan_nextBatch_0$(Unknown Source)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage0.processNext(Unknown Source)
at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$12$$anon$1.hasNext(WholeStageCodegenExec.scala:731)
at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:461)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
at org.apache.spark.shuffle.sort.UnsafeShuffleWriter.write(UnsafeShuffleWriter.java:198)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53)
at org.apache.spark.scheduler.Task.run(Task.scala:112)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:359)
Driver stacktrace:
问题
Caused by: java.lang.OutOfMemoryError: Java heap space
解决
参考 Spark java.lang.OutOfMemoryError: Java heap space
增大参数
set spark.driver.cores = "4";
set spark.driver.memory = "40g";
set executor-memory = "30G";
set spark.executor.cores = "2"
set spark.sql.parquet.enableVectorizedReader = false;