执行spark程序

该博客介绍了如何通过Shell脚本来执行Spark程序。脚本首先设置了环境变量,然后定义了删除HDFS上特定路径的函数,接着检查并删除目标路径的文件。如果任务未完成,则会启动Spark作业,该作业包含具体的配置参数,并使用`spark-submit`提交。如果任务在三次尝试后仍未完成,脚本将输出错误信息并退出。
摘要由CSDN通过智能技术生成

执行spark任务通过shell脚本
#!/bin/sh
export LANG=“en_US.UTF-8”
#基准时间 当天日期
day=date -d "$BASETIME" +"%Y%m%d"
#Spark目录,xingtian集群
SPARK_HOME="/home/map/spark-client/xingtian"
#SPARK_HOME="/home/map/spark-client/wutai"
#SPARK_HOME="/home/map/rd/zhangni02/matrix_spark_scala/spark/spark-2.3.2.0"
#Hadoop目录
HADOOP_HOME="/home/map/hadoop-client/hadoop"
#输入输出数据路径
INPUT_PATH=“afs://xingtian.afs.baidu.com:9902/app/insight/lbs/lbs_mobile_matrix_user_daily/event_day= d a y / e v e n t t y p e = m a t r i x s t a t d a i l y / ∗ " O U T P U T P A T H = " a f s : / / x i n g t i a n . a f s . b a i d u . c o m : 9902 / a p p / i n s i g h t / l b s / l b s m o b i l e m a t r i x f a c t / e v e n t d a y = {day}/event_type=matrix_stat_daily/*" OUTPUT_PATH="afs://xingtian.afs.baidu.com:9902/app/insight/lbs/lbs_mobile_matrix_fact/event_day= day/eventtype=matrixstatdaily/"OUTPUTPATH="afs://xingtian.afs.baidu.com:9902/app/insight/lbs/lbsmobilematrixfact/eventday={day}”

H A D O O P H O M E / b i n / h a d o o p f s − t e s t − e " {HADOOP_HOME}/bin/hadoop fs -test -e " HADOOPHOME/bin/hadoopfsteste"{OUTPUT_PATH}/event_type=nophoneinfo_normal"
if [ $? -eq 0 ];then
H A D O O P H O M E / b i n / h a d o o p f s − r m r " {HADOOP_HOME}/bin/hadoop fs -rmr " HADOOPHOME/bin/hadoopfsrmr"{OUTPUT_PATH}/event_type=nophoneinfo_normal"
fi

H A D O O P H O M E / b i n / h a d o o p f s − t e s t − e " {HADOOP_HOME}/bin/hadoop fs -test -e " HADOOPHOME/bin/hadoopfsteste"{OUTPUT_PATH}/event_type=nosv_phoneinfo_normal"
if [ $? -eq 0 ];then
H A D O O P H O M E / b i n / h a d o o p f s − r m r " {HADOOP_HOME}/bin/hadoop fs -rmr " HADOOPHOME/bin/hadoopfsrmr"{OUTPUT_PATH}/event_type=nosv_phoneinfo_normal"
fi

H A D O O P H O M E / b i n / h a d o o p f s − t e s t − e " {HADOOP_HOME}/bin/hadoop fs -test -e " HADOOPHOME/bin/hadoopfsteste"{OUTPUT_PATH}/event_type=nophoneinfo_dims"
if [ $? -eq 0 ];then
H A D O O P H O M E / b i n / h a d o o p f s − r m r " {HADOOP_HOME}/bin/hadoop fs -rmr " HADOOPHOME/bin/hadoopfsrmr"{OUTPUT_PATH}/event_type=nophoneinfo_dims"
fi

H A D O O P H O M E / b i n / h a d o o p f s − t e s t − e " {HADOOP_HOME}/bin/hadoop fs -test -e " HADOOPHOME/bin/hadoopfsteste"{OUTPUT_PATH}/event_type=nosv_phoneinfo_dims"
if [ $? -eq 0 ];then
H A D O O P H O M E / b i n / h a d o o p f s − r m r " {HADOOP_HOME}/bin/hadoop fs -rmr " HADOOPHOME/bin/hadoopfsrmr"{OUTPUT_PATH}/event_type=nosv_phoneinfo_dims"
fi

H A D O O P H O M E / b i n / h a d o o p f s − t e s t − e " {HADOOP_HOME}/bin/hadoop fs -test -e " HADOOPHOME/bin/hadoopfsteste"{OUTPUT_PATH}/event_type=dims"
if [ $? -eq 0 ];then
H A D O O P H O M E / b i n / h a d o o p f s − r m r " {HADOOP_HOME}/bin/hadoop fs -rmr " HADOOPHOME/bin/hadoopfsrmr"{OUTPUT_PATH}/event_type=dims"
fi

H A D O O P H O M E / b i n / h a d o o p f s − t e s t − e " {HADOOP_HOME}/bin/hadoop fs -test -e " HADOOPHOME/bin/hadoopfsteste"{OUTPUT_PATH}/event_type=nosv_dims"
if [ $? -eq 0 ];then
H A D O O P H O M E / b i n / h a d o o p f s − r m r " {HADOOP_HOME}/bin/hadoop fs -rmr " HADOOPHOME/bin/hadoopfsrmr"{OUTPUT_PATH}/event_type=nosv_dims"
fi

#Starting && Running date
if [ -z “${day}” ]; then
echo “running date is empty”
exit 1
else
echo “running date is ${day}”
fi

#Job Function
function runSparkJob() {
${SPARK_HOME}/bin/spark-submit --master yarn --queue spark-map-client
–num-executors 50
–executor-cores 8
–executor-memory 16G
–driver-memory 16G
–conf spark.memory.useLegacyMode=true
–conf spark.shuffle.memoryFraction=0.7
–conf spark.storage.memoryFraction=0.2
–conf spark.default.parallelism=10000
–conf spark.speculation=true
–conf spark.speculation.multiplier=1.5
–conf spark.speculation.quantile=0.99
–conf spark.speculation.interval=100
–conf spark.scheduler.listenerbus.eventqueue.capacity=100000
–files ./hive-site.xml
–class baidu.mapx.matrixspark.ComputeIconPvUv MatrixSpark.jar ${day} ${INPUT_PATH} ${OUTPUT_PATH}
}

#如果有任务失败,重试3次
for (( i=0; i<3; i++ ));
do
#检查任务是否完成
H A D O O P H O M E / b i n / h a d o o p f s − t e s t − e " {HADOOP_HOME}/bin/hadoop fs -test -e " HADOOPHOME/bin/hadoopfsteste"{OUTPUT_PATH}/event_type=nophoneinfo_normal/_SUCCESS"
if [ $? -eq 0 ];then
H A D O O P H O M E / b i n / h a d o o p f s − t e s t − e " {HADOOP_HOME}/bin/hadoop fs -test -e " HADOOPHOME/bin/hadoopfsteste"{OUTPUT_PATH}/event_type=nosv_phoneinfo_normal/_SUCCESS"
if [ $? -eq 0 ];then
H A D O O P H O M E / b i n / h a d o o p f s − t e s t − e " {HADOOP_HOME}/bin/hadoop fs -test -e " HADOOPHOME/bin/hadoopfsteste"{OUTPUT_PATH}/event_type=nophoneinfo_dims/_SUCCESS"
if [ $? -eq 0 ];then
H A D O O P H O M E / b i n / h a d o o p f s − t e s t − e " {HADOOP_HOME}/bin/hadoop fs -test -e " HADOOPHOME/bin/hadoopfsteste"{OUTPUT_PATH}/event_type=nosv_phoneinfo_dims/_SUCCESS"
if [ $? -eq 0 ];then
H A D O O P H O M E / b i n / h a d o o p f s − t e s t − e " {HADOOP_HOME}/bin/hadoop fs -test -e " HADOOPHOME/bin/hadoopfsteste"{OUTPUT_PATH}/event_type=dims/_SUCCESS"
if [ $? -eq 0 ];then
H A D O O P H O M E / b i n / h a d o o p f s − t e s t − e " {HADOOP_HOME}/bin/hadoop fs -test -e " HADOOPHOME/bin/hadoopfsteste"{OUTPUT_PATH}/event_type=nosv_dims/_SUCCESS"
if [ $? -eq 0 ];then
echo “task ${day} has already finished”
break
fi
fi
fi
fi
fi
else
H A D O O P H O M E / b i n / h a d o o p f s − t e s t − e " {HADOOP_HOME}/bin/hadoop fs -test -e " HADOOPHOME/bin/hadoopfsteste"{OUTPUT_PATH}/event_type=nophoneinfo_normal"
if [ $? -eq 0 ];then
H A D O O P H O M E / b i n / h a d o o p f s − r m r " {HADOOP_HOME}/bin/hadoop fs -rmr " HADOOPHOME/bin/hadoopfsrmr"{OUTPUT_PATH}/event_type=nophoneinfo_normal"
fi
H A D O O P H O M E / b i n / h a d o o p f s − t e s t − e " {HADOOP_HOME}/bin/hadoop fs -test -e " HADOOPHOME/bin/hadoopfsteste"{OUTPUT_PATH}/event_type=nosv_phoneinfo_normal"
if [ $? -eq 0 ];then
H A D O O P H O M E / b i n / h a d o o p f s − r m r " {HADOOP_HOME}/bin/hadoop fs -rmr " HADOOPHOME/bin/hadoopfsrmr"{OUTPUT_PATH}/event_type=nosv_phoneinfo_normal"
fi
H A D O O P H O M E / b i n / h a d o o p f s − t e s t − e " {HADOOP_HOME}/bin/hadoop fs -test -e " HADOOPHOME/bin/hadoopfsteste"{OUTPUT_PATH}/event_type=nophoneinfo_dims"
if [ $? -eq 0 ];then
H A D O O P H O M E / b i n / h a d o o p f s − r m r " {HADOOP_HOME}/bin/hadoop fs -rmr " HADOOPHOME/bin/hadoopfsrmr"{OUTPUT_PATH}/event_type=nophoneinfo_dims"
fi
H A D O O P H O M E / b i n / h a d o o p f s − t e s t − e " {HADOOP_HOME}/bin/hadoop fs -test -e " HADOOPHOME/bin/hadoopfsteste"{OUTPUT_PATH}/event_type=nosv_phoneinfo_dims"
if [ $? -eq 0 ];then
H A D O O P H O M E / b i n / h a d o o p f s − r m r " {HADOOP_HOME}/bin/hadoop fs -rmr " HADOOPHOME/bin/hadoopfsrmr"{OUTPUT_PATH}/event_type=nosv_phoneinfo_dims"
fi
H A D O O P H O M E / b i n / h a d o o p f s − t e s t − e " {HADOOP_HOME}/bin/hadoop fs -test -e " HADOOPHOME/bin/hadoopfsteste"{OUTPUT_PATH}/event_type=dims"
if [ $? -eq 0 ];then
H A D O O P H O M E / b i n / h a d o o p f s − r m r " {HADOOP_HOME}/bin/hadoop fs -rmr " HADOOPHOME/bin/hadoopfsrmr"{OUTPUT_PATH}/event_type=dims"
fi
H A D O O P H O M E / b i n / h a d o o p f s − t e s t − e " {HADOOP_HOME}/bin/hadoop fs -test -e " HADOOPHOME/bin/hadoopfsteste"{OUTPUT_PATH}/event_type=nosv_dims"
if [ $? -eq 0 ];then
H A D O O P H O M E / b i n / h a d o o p f s − r m r " {HADOOP_HOME}/bin/hadoop fs -rmr " HADOOPHOME/bin/hadoopfsrmr"{OUTPUT_PATH}/event_type=nosv_dims"
fi
runSparkJob
fi
done

H A D O O P H O M E / b i n / h a d o o p f s − t e s t − e " {HADOOP_HOME}/bin/hadoop fs -test -e " HADOOPHOME/bin/hadoopfsteste"{OUTPUT_PATH}/event_type=nophoneinfo_normal/_SUCCESS"
if [ $? -eq 0 ];then
H A D O O P H O M E / b i n / h a d o o p f s − t e s t − e " {HADOOP_HOME}/bin/hadoop fs -test -e " HADOOPHOME/bin/hadoopfsteste"{OUTPUT_PATH}/event_type=nosv_phoneinfo_normal/_SUCCESS"
if [ $? -eq 0 ];then
H A D O O P H O M E / b i n / h a d o o p f s − t e s t − e " {HADOOP_HOME}/bin/hadoop fs -test -e " HADOOPHOME/bin/hadoopfsteste"{OUTPUT_PATH}/event_type=nophoneinfo_dims/_SUCCESS"
if [ $? -eq 0 ];then
H A D O O P H O M E / b i n / h a d o o p f s − t e s t − e " {HADOOP_HOME}/bin/hadoop fs -test -e " HADOOPHOME/bin/hadoopfsteste"{OUTPUT_PATH}/event_type=nosv_phoneinfo_dims/_SUCCESS"
if [ $? -eq 0 ];then
H A D O O P H O M E / b i n / h a d o o p f s − t e s t − e " {HADOOP_HOME}/bin/hadoop fs -test -e " HADOOPHOME/bin/hadoopfsteste"{OUTPUT_PATH}/event_type=dims/_SUCCESS"
if [ $? -eq 0 ];then
H A D O O P H O M E / b i n / h a d o o p f s − t e s t − e " {HADOOP_HOME}/bin/hadoop fs -test -e " HADOOPHOME/bin/hadoopfsteste"{OUTPUT_PATH}/event_type=nosv_dims/_SUCCESS"
if [ $? -eq 0 ];then
echo “"
echo "
matrix_normal_fact completed "
echo "

exit 0
fi
fi
fi
fi
fi
else
echo “*************"
echo "************** matrix_normal_fact failed "
echo "

exit 1
fi

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值