1、Spark提交任务到Yarn
1.1、DwKuduApp
spark-submit --class com.io.etl.dwkudu.DwKuduApp \
--files /etl/etl-dwkudu/conf/doris.property,/etl/etl-dwkudu/conf/redis.property,/etl/etl-dwkudu/conf/log4j.property \
--master yarn --deploy-mode cluster \
--driver-memory 2g \
--executor-memory 7g \
--conf "spark.executor.cores=2" \
--conf "spark.yarn.maxAppAttempts=0" \
--conf "spark.task.maxFailures=1" \
--conf "spark.dynamicAllocation.enabled=false" \
--conf "spark.executor.instances=8" \
--conf "spark.yarn.executor.memoryOverhead=1024m" \
--conf "spark.executor.extraJavaOptions=-Dlog4j.configration=log4j_dwkudu.properties -XX:+UseG1GC" \
--conf "spark.streaming.kafka.maxRatePerPartition=$datacount" \
/etl/etl-dw-kudu/etl-dw-kudu.jar $period > /etl-log/etl-dw-kudu/etl-dw-kudu.log
1.2、GateAppetl
spark-submit --class com.io.etl.gate.GateAppetl \
--files /etl/gate/conf/doris.property,/etl/gate/conf/redis.property,/etl/gate/conf/log4j.property \
--master yarn --deploy-mode cluster \
--driver-memory 600m \
--executor-memory 1g \
--conf "spark.executor.cores=1" \
--conf "spark.yarn.maxAppAttempts=0" \
--conf "spark.yarn.task.maxFailures=1" \
--conf "spark.dynamicAllocation.enabled=false" \
--conf "spark.executor.instances=8" \
--conf "spark.yarn.executor.memoryOverhead=1524m" \
--conf "spark.executor.extraJavaOptions=-Dlog4j.configuration=log4j_gate.properties -XX:+UseG1GC -verbose:gc -XX:PrintGCTimeStamps" \
--conf "spark.streaming.kafka.maxRatePerPartition=1000" \
/etl/etl-gate/etl-gate.jar 10 > /etl-log/etl-dw-kudu/etl-gate.log
1.3、PayKuDuIdApp
spark-submit --class com.io.etl.PayKuDuIdApp \
--files /etl/gate/conf/doris.property,/etl/gate/conf/redis.property,/etl/gate/conf/log4j.property \
--master yarn --deploy-mode cluster \
--driver-memory 2048m \
--executor-memory 4g \
--conf "spark.executor.cores=1" \
--conf "spark.yarn.maxAppAttempts=0" \
--conf "spark.yarn.task.maxFailures=1" \
--conf "spark.dynamicAllocation.enabled=false" \
--conf "spark.executor.instances=4" \
--conf "spark.yarn.executor.memoryOverhead=2048m" \
--conf "spark.driver.extraJavaOptions=-Dfileencoding=utf-8" \
--conf "spark.executor.extraJavaOptions=-Dlog4j.configuration=log4j_id.properties -Dfile.encoding=utf-8 -XX:+UseG1GC -verbose:gc -XX:PrintGCDetails +XX:+PrintGCTimeStamps" \
--conf "spark.streaming.kafka.maxRatePerPartition=12000" \
/etl/etl-id-kudu/etl-id-kudu.jar 2 3000 > /etl-log/etl-id-kudu/etl-id-kudu.log
1.4、提交参数参考
--conf spark.yarn.job.owners=xxx
--conf spark.yarn.executor.memoryOverhead=2048
--conf spark.sql.shuffle.partitions=2000
--conf spark.yarn.appMasterEnv.JAVA_HOME=/opt/soft/openjdk1.8.0
--conf spark.executorEnv.JAVA_HOME=/opt/soft/openjdk1.8.0
--conf spark.dynamicAllocation.enabled=true
--conf spark.shuffle.service.enabled=true
--conf spark.cross.dc.inputs.location.prefix.substitute.enabled=false
--conf spark.dynamicAllocation.minExecutors=2
--conf spark.dynamicAllocation.maxExecutors=700
--conf spark.dynamicAllocation.executorIdleTimeout=600s
--conf spark.hadoop.parquet.enable.summary-metadata=true
--conf spark.speculation=true
--conf spark.speculation.multiplier=2
--conf spark.speculation.quantile=0.5
--conf spark.scheduler.executorTaskBlacklistTime=300000
--conf spark.shuffle.compress=false
--conf spark.shuffle.spill.compress=false
--conf spark.default.parallelism=2000
--conf spark.remote.shuffle.service.enabled=true
--conf spark.serializer=org.apache.spark.serializer.KryoSerializer
--conf spark.streaming.backpressure.enabled=true
--conf spark.streaming.backpressure.initialRate=1000
--conf spark.streaming.stopGracefullyOnShutdown=true
--conf spark.streaming.kafka.maxRatePerPartition=1000
--conf spark.streaming.kafka.consumer.poll.ms=20000
--conf spark.reducer.maxSizeInFlight=96m
--conf spark.shuffle.io.retryWait=20s
--conf spark.shuffle.memoryFraction=0.4