1.num-executors 50~100
2.executor-memory 4G~8G num-executors乘以executor-memory,就代表了你的Spark作业申请到的总内存量,这个量是不能超过队列的最大内存量的
3.executor-cores 2~4
4.spark.default.parallelism 用于设置每个stage的默认task数量,Spark作业的默认task数量为500~1000个较为合适,设置该参数为num-executors * executor-cores的2~3倍较为合适
./bin/spark-submit \
--master yarn-cluster \
--num-executors 100 \
--executor-memory 6G \
--executor-cores 4 \
--driver-memory 1G \
--conf spark.default.parallelism=1000 \
--conf spark.storage.memoryFraction=0.5 \
--conf spark.shuffle.memoryFraction=0.3 \
[root@cloud-vm1 ~]# spark-submit --class com.rexen.ComparisonOrcla --master yarn-cluster --num-executors 50 --executor-cores 4 --executor-memory 6G --conf spark.default.parallelism=500 /soft/ComparisonOrcla.jar
spark-submit --class com.rexen.ComparisonOrcla --master yarn-client --num-executors 50 --executor-cores 4 --executor-memory 6G --conf spark.default.parallelism=500 /soft/ComparisonOrcla.jar
2.executor-memory 4G~8G num-executors乘以executor-memory,就代表了你的Spark作业申请到的总内存量,这个量是不能超过队列的最大内存量的
3.executor-cores 2~4
4.spark.default.parallelism 用于设置每个stage的默认task数量,Spark作业的默认task数量为500~1000个较为合适,设置该参数为num-executors * executor-cores的2~3倍较为合适
./bin/spark-submit \
--master yarn-cluster \
--num-executors 100 \
--executor-memory 6G \
--executor-cores 4 \
--driver-memory 1G \
--conf spark.default.parallelism=1000 \
--conf spark.storage.memoryFraction=0.5 \
--conf spark.shuffle.memoryFraction=0.3 \
[root@cloud-vm1 ~]# spark-submit --class com.rexen.ComparisonOrcla --master yarn-cluster --num-executors 50 --executor-cores 4 --executor-memory 6G --conf spark.default.parallelism=500 /soft/ComparisonOrcla.jar
spark-submit --class com.rexen.ComparisonOrcla --master yarn-client --num-executors 50 --executor-cores 4 --executor-memory 6G --conf spark.default.parallelism=500 /soft/ComparisonOrcla.jar