spark任务

spark脚本:

#!/bin/sh
source ExitCodeCheck.sh
exitCodeCheck(){
if [ $1 -ne 0 ]; then
  echo 'shell execute return value is' $1 'is not 0'
  exit $1
else
  echo 'shell execute success'
fi
}
opts=$@


getparam(){
arg=$1
echo $opts |xargs -n1 |cut -b 2- |awk -F'=' '{if($1=="'"$arg"'") print $2}'
}


IncStart=`getparam inc_start`
IncEnd=`getparam inc_end`
oracle_connection=`getparam jdbc_str`
oracle_username=`getparam db_user`
oracle_password=`getparam db_psw`
dataName=`getparam db_sid`
queueName=`getparam hdp_queue`
hdfshostname=`getparam hdfs_host`;


IncStartYear=`echo ${IncStart:0:4}`;
IncStartMonth=`echo ${IncStart:4:2}`;
IncStartDay=`echo ${IncStart:6:2}`;
IncStartAll=${IncStartYear}"-"${IncStartMonth}"-"${IncStartDay}" 00:00:00.0";
IncStartAllFormat=${IncStartYear}"-"${IncStartMonth}"-"${IncStartDay};
IncEndYear=`echo ${IncEnd:0:4}`;
IncEndMonth=`echo ${IncEnd:4:2}`;
IncEndDay=`echo ${IncEnd:6:2}`;
IncEndAll=${IncEndYear}"-"${IncEndMonth}"-"${IncEndDay}" 00:00:00.0";
IncEndAllFormat=${IncEndYear}"-"${IncEndMonth}"-"${IncEndDay};


OneDayAgo=`date -d "$IncStart 1 days ago  " +%Y%m%d  `;
OneDayAgoYear=`echo ${OneDayAgo:0:4}`;
OneDayAgoMonth=`echo ${OneDayAgo:4:2}`;
OneDayAgoDay=`echo ${OneDayAgo:6:2}`;
OneDayAgoAll=${OneDayAgoYear}"-"${OneDayAgoMonth}"-"${OneDayAgoDay}" 00:00:00.0";
OneDayAgoAllFormat=${OneDayAgoYear}"-"${OneDayAgoMonth}"-"${OneDayAgoDay};


#任务名取脚本名
job_name=$0


rm -rf spark2phoenix.jar 
hadoop fs -get ${hdfshostname}/apps/hduser0101/an_pafc_safe/lib/spark2phoenix.jar .;






SparkTaskName='spark_cgi_pssp_group_info_education';
SparkExecutorMemory='1G';
SparkExecutorCores='1';
SparkJar='spark2phoenix.jar';
SparkDriverMemory='1G';


queueName='queue_0101_01'
appName='spark_cgi_pssp_group_info_education' ;
sql='SELECT id_icm,ecif_no,agent_code, phone_number FROM an_pafc_safe.idld_lcdm_mit_client_group_info WHERE GROUP_CODE='050108' limit 1000000';
outputTable='cgi.pssp_group_info_education';






#=========开发参数===========
#kafka_metadata_broker_list="10.20.24.151:9092,10.20.24.159:9092,10.20.24.161:9092"    
#zookeeper_quorum="10.20.24.151:2181,10.20.24.159:2181,10.20.24.161:2181"  
#phoenix_jdbc_url="10.20.24.151,10.20.24.159,10.20.24.161:2181:/gbd2-hbase-kylin"  
#
#=========测试参数===========
#kafka_metadata_broker_list="30.4.64.78:9092,30.4.64.76:9092,30.4.64.77:9092"    
#zookeeper_quorum="30.4.64.78:2181,30.4.64.77:2181,30.4.64.76:2181"  
#phoenix_jdbc_url="30.4.64.78,30.4.64.77,30.4.64.76:2181:/gbd2-hbase-kylin"  


#=========生产参数===========
kafka_metadata_broker_list="30.4.32.71:9092,30.4.32.72:9092,30.4.32.73:9092"
zookeeper_quorum="30.4.32.71:2181,30.4.32.72:2181,30.4.32.73:2181"
phoenix_jdbc_url="30.16.16.29,30.16.16.33,30.16.16.30:2181:/gbd2-hbase02"




spark-submit                                   \
--class com.paic.spark.utils.Spark2Phoenix             \
--master yarn-client \
--executor-memory 4G \
--driver-memory 2G \
--num-executors 5 \
--executor-cores 4 \
--queue ${queueName} \
--conf spark.sql.autoBroadcastJoinThreshold=20971520 \
--conf spark.default.parallelism=40 \
--conf spark.sql.shuffle.partitions=40 \
--conf spark.speculation=false \
--conf spark.task.maxFailures=40 \
--conf spark.akka.timeout=300 \
--conf spark.network.timeout=300 \
--conf spark.yarn.max.executor.failures=40 \
--conf spark.executor.extraJavaOptions="-XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:+CMSParallelRemarkEnabled -XX:+ParallelRefProcEnabled -XX:+CMSClassUnloadingEnabled -XX:+PrintGCDetails -XX:+PrintGCDateStamps -XX:+PrintGCTimeStamps -XX:+PrintHeapAtGC -XX:+HeapDumpOnOutOfMemoryError -verbose:gc " \
${SparkJar} "${appName}" "${sql}" "${outputTable}" "${phoenix_jdbc_url}"
exitCodeCheck $?


 

 

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值