spark-submit提交的shell脚本_spark-submit shell脚本编写-CSDN博客

本文链接：https://blog.csdn.net/u011098327/article/details/54946565

spark-submit向yarn提交application的脚本，包括spark参数、环境变量、应用程序参数传入

#!/bin/bash

source ~/.bash_profile

APP_HOME=/home/data_user/recommend
echo $APP_HOME

###################### etl候选集和浏览数据集 ##############################################
dt=`date -d "now  1 days ago "  "+%Y%m%d"`
# 用户商品候选集
hive -hiveconf dt="${dt}" -f $APP_HOME/hql/tmp.rdm_user_product_candidate.sql

# 用户历史浏览商品
hive -hiveconf dt="${dt}" -f $APP_HOME/hql/tmp.rdm_user_rating_product.sql


################################# spark job ###########################################
for f in $APP_HOME/lib/*.jar; do
   app_CLASSPATH=$f,${app_CLASSPATH}
done
len=${#app_CLASSPATH}-1
JAR_PATH=${app_CLASSPATH:0:len}                # 依赖jar包

rawRatingSQLFile=$APP_HOME/conf/user_product_rating.sql               # 用户商品评分数据
userProdSQLFile=$APP_HOME/conf/user_product_candidate.sql             # 用户商品候选数据
productType=phone                                                     # 商品类型
saveTable=edw.rdm_cb_item_user_phone                                  # 推荐结果表
partitionNum=256                                                      # 数据集分区数
topN=10                                                               # 推荐结果取topN数

spark-submit --master yarn \
             --deploy-mode client \
             --num-executors 16 \
             --executor-cores 2 \
             --executor-memory 8g \
             --class com.test.data.recommender.strategy.contentbase.ContentBasedI2URecommender \
             --conf spark.default.parallelism=256 \
             --conf spark.sql.shuffle.partitions=400 \
             --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
             --driver-memory 2g \
             --jars $JAR_PATH   \
             $APP_HOME/recommender-1.0-SNAPSHOT.jar $rawRatingSQLFile $userProdSQLFile $productType $saveTable $partitionNum $topN

###### 执行异常与否判断
rc=$?
if [[ $rc != 0 ]]; then
    echo "`date "+%Y-%m-%d %H:%M:%S"` Spark job run failed......"
    exit 1
else
    echo "`date "+%Y-%m-%d %H:%M:%S"` Spark job run successfully......."
fi