spark-submit
–master yarn --driver-memory 10G --executor-memory 20G --conf spark.shuffle.service.enabled=true --conf spark.dynamicAllocation.enabled=true --conf spark.dynamicAllocation.minExecutors=5 --conf spark.dynamicAllocation.maxExecutors=300 --executor-cores 16 --conf spark.executor.memoryOverhead=8G --conf spark.broadcast.compress=true --conf spark.rdd.compress=true --conf spark.speculation=true --conf spark.sql.orc.filterPushdown=true --conf spark.shuffle.consolidateFiles=true --conf spark.executor.extraJavaOptions=’-verbose:gc -XX:+UseCompressedOops -XX:-PrintGCDetails -XX:+PrintGCTimeStamps -XX:CMSInitiatingOccupancyFraction=60’ --conf spark.driver.extraLibraryPath=/software/servers/hadoop-2.7.1/lib/native --conf spark.executor.extraLibraryPath=/software/servers/hadoop-2.7.1/lib/native:/software/servers/hadoop-2.7.1/share/hadoop/common/lib/hadoop-lzo-0.4.20.jar --conf spark.pyspark.python=python2.7 \test.py 2020-02-29
spark-sql
–master yarn --conf spark.executor.instances=50 --conf spark.executor.cores=4 --conf spark.executor.memory=12g --conf spark.driver.memory=8g --conf spark.driver.cores=4 --conf spark.sql.shuffle.partitions=900 --conf spark.dynamicAllocation.enabled=true --conf spark.shuffle.service.enabled=true --conf spark.speculation=false --conf spark.isLoadHivercFile=true --conf spark.sql.tempudf.ignoreIfExists=true --conf spark.sql.parser.quotedRegexColumnNames=true --conf spark.sql.crossJoin.enabled=true --conf spark.resource.level=low --conf spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version=2 --hiveconf hive.exec.orc.split.strategy=BI -e “sql”