-- 处理数据倾斜
set spark.sql.adaptive.enabled=true;
set spark.sql.adaptive.skewJoin.enabled=true;
set spark.sql.adaptive.skewJoin.skewedPartitionFactor=5;-- 判定倾斜的膨胀系数
set spark.sql.adaptive.skewJoin.skewedPartitionThresholdInBytes=256MB;-- 判定倾斜的最低阈值
set spark.sql.adaptive.advisoryPartitionSizeInBytes=64M;-- 以字节为单位,定义拆分粒度
-- 小文件合并(动态分区覆盖)
set spark.sql.adaptive.enabled=true;
set hive.exec.dynamic.partition.mode=nonstrict;
set spark.sql.hive.convertInsertingPartitionedTable=false;(表能被impala查询)
set spark.sql.optimizer.insertRepartitionBeforeWriteIfNoShuffle.enabled=true;
set spark.sql.adaptive.advisoryPartitionSizeInBytes=64M;
参考文档
https://spark.apache.org/docs/latest/sql-performance-tuning.html#adaptive-query-execution
https://support.huaweicloud.com/cmpntguide-mrs/mrs_01_1970.html