记录一把spark on yarn模式任务提交,方便以后使用,资源大佬们可根据实际需求自行调整
#!/usr/bin/env bash
. /etc/profile
. ~/.bash_profile
spark-submit \
--master yarn-cluster \
--class XXX.XXX.XXX\
--jars /data/batchJob/lib/hbase-server-1.0.0-cdh5.4.4.jar,/data/batchJob/lib/htrace-core-3.0.4.jar,/data/batchJob/lib/htrace-core-3.1.0-incubating.jar,/data/batchJob/lib/spark-hive_2.10-1.6.0.jar,/data/batchJob/lib/hbase-protocol-1.0.0-cdh5.4.4.jar,/data/batchJob/lib/hbase-common-1.0.0-cdh5.4.4.jar,/data/batchJob/lib/bson-3.4.2.jar,/data/batchJob/lib/datanucleus-rdbms-3.2.10.jar,/data/batchJob/lib/datanucleus-api-jdo-3.2.1.jar,/data/batchJob/lib/datanucleus-core-3.2.10.jar,/data/batchJob/lib/mongo-java-driver-3.4.2.jar,/data/batchJob/lib/hbase-client-1.0.0-cdh5.4.4.jar \
--driver-memory 5g \
--driver-cores 4 \
--num-executors 40 \
--executor-cores 2 \
--executor-memory 4g \
--conf spark.shuffle.service.enabled=true \
/data/AAA/user.jar
另外附带一个用户自定义修改spark参数版
#!/usr/bin/env bash
spark-submit \
--master yarn-cluster \
--class T1 \
--name T1 \
--driver-memory 2g \
--driver-cores 2 \
--num-executors 10 \
--executor-cores 1 \
--executor-memory 2g \
--conf spark.yarn.executor.memoryOverhead=4096 \
--conf spark.network.timeout=10000000 \
--conf spark.executor.heartbeatInterval=10000000 \
--conf spark.shuffle.service.enabled=true \
--conf "spark.driver.extraJavaOptions=-XX:MaxPermSize=1g -XX:+UseConcMarkSweepGC" \
--conf "spark.executor.extraJavaOptions=-XX:+UseConcMarkSweepGC" \
/data/batchJob/XXXHdfs.jar