-- 优化建议
set spark.executor.memory=24g; -- 内存分配
set spark.driver.memory=32g; -- 堆内存扩大
set spark.vcore.boost.ratio=1;
set spark.driver.cores=4; -- 单个core执行的任务数,默认是1
set spark.sql.fragPartition.maxShuffleBytes=1073741824;
set spark.yarn.batch.smart.heuristic=125495624;
set spark.sql.files.maxPartitionBytes=8589934592; -- 默认128M,调小可提高map任务数(最后要合并小文件)
set spark.sql.parquet.adaptiveFileSplit=true;
set spark.sql.fragPartition.parquet.fast.mode.enabled=true;
set spark.sql.fragPartition.compactEnabled=true;
set spark.maxRemoteBlockSizeFetchToMem=268435456; -- 默认512M,为了避免占用太多内存的巨大请求、在较小的块上使用太多的内存
set spark.sql.fragPartition.skip.failure=true;
set spark.driver.memoryOverhead=4096; --允许使用对外内存
set spark.sql.adaptive.maxNumPostShufflePartitions=125; -- 优化运行速度,并减小存储减少CPU浪费
set spark.sql.fragPartition.threshold=268435456;
set spark.sql.orc.adaptiveFileSplit=true;
set spark.executor.memoryOverhead=4096; -- 最大值,允许executor使用堆外内存
set spark.sql.fragPartition.expectedBytes=268435456;